In [None]:
import matplotlib.pyplot as plt
import random
from keras.utils import to_categorical

In [None]:
import keras

In [None]:
from keras import datasets
#just to start with mnist

(X_train1, y_train1), (X_test1, y_test1) = datasets.mnist.load_data()

In [None]:
#colorful toy set
(X_train2, y_train2), (X_test2, y_test2) = datasets.cifar10.load_data()

In [None]:
#let's use the colorful toy set
X_train = X_train2
y_train = y_train2
X_test = X_test2
y_test = y_test2

In [None]:
print ('X_train: ', X_train.shape,
       '\ny_train: ', y_train.shape,
       '\nX_test: ', X_test.shape,
       '\ny_test: ', y_test.shape)

In [None]:
for i in range(4):
    plt.imshow(random.choice(X_train), cmap='gray')
    plt.show()

In [None]:
#rescale pixels, ohe

X_train = X_train.astype('float') / 255
X_test = X_test.astype('float') / 255

y_train_onehot, y_test_onehot = to_categorical(y_train), to_categorical(y_test)

In [None]:
print ('X_train: ', X_train.shape,
       '\ny_train_onehot: ', y_train.shape,
       '\nX_test: ', X_test.shape,
       '\ny_test_onehot: ', y_test.shape)

In [None]:
#take a subset
# t = X_train[1][0][0][0]
# X_train[1].shape #extract image
# X_train[1][1][1].shape #1 pixel in the 3 colors channels

X_train_sub = X_train[:3000]
X_test_sub = X_test[:3000]
y_train_onehot_sub = y_train_onehot[:3000]
y_test_onehot_sub = y_test_onehot[:3000]

X_train_sub.shape

In [None]:
from keras.layers import Input, Conv2D, BatchNormalization, Activation
from keras.layers import MaxPooling2D, Flatten, Dense, Dropout
from keras import Model

from keras.callbacks import EarlyStopping

In [None]:
input_shape = X_train_sub[0].shape #the input shape should not include batch size

def basic_cnn_model(input_shape):
    X_input = Input((input_shape))
    print('made input layer: ', X_input.shape)
    
    #conv
    X = Conv2D(16, (3,3), name='conv0')(X_input)
    print('after conv2d: ', X.shape)
    X = BatchNormalization(axis = 3, name = 'bn0')(X)
    print('after BN: ', X.shape)
    X = Activation('relu')(X)
    print('after activation: ', X.shape)
    X = MaxPooling2D((2), name='max_pool0')(X)
    print('after maxpool2d: ', X.shape)
    
    X = Conv2D(25, (6,6), name='conv1')(X)
    print('after conv2d: ', X.shape)
    X = BatchNormalization(axis = 3, name = 'bn1')(X)
    print('after BN: ', X.shape)
    X = Activation('relu')(X)
    print('after activation: ', X.shape)
    X = MaxPooling2D((2), name='max_pool1')(X)
    print('after maxpool2d: ', X.shape)
    
    
    
    #dropout
    X = Dropout(0.5)(X)
    print('after dropout: ', X.shape)
    
    #rest
    X = Flatten()(X)
    print('after flatten: ', X.shape)
    
#     #add one dense layer
#     X = Dense(150, activation='relu', name='dense')(X)
#     print('after dense: ', X.shape)
    
#     X = Dropout(0.5)(X)
    
    #final prediction
    X = Dense(10, activation='softmax', name='final_dense')(X)
    print('after dense: ', X.shape)
    
    model = Model(inputs = X_input, outputs = X,  name='basic_cnn')
    #here we are only building the model, that starting from X_input leads to (the last
    #X) through all the layers
    return model

In [None]:
from keras.backend import clear_session
# clear_session()

In [None]:
cnn_model = basic_cnn_model(input_shape)

In [None]:
# early_stop = EarlyStopping(monitor='accuracy', min_delta=0.0001, patience=2, verbose=1)

In [None]:
cnn_model.compile(optimizer='adam', loss='categorical_crossentropy',
                 metrics=['accuracy'])

In [None]:
#just to test early stop

# history = cnn_model.fit(X_train_sub, y_train_onehot_sub, batch_size=32, epochs=30,
#              validation_split=0.2, callbacks=[early_stop])

In [None]:
history = cnn_model.fit(X_train, y_train_onehot, batch_size=32, epochs=300,
             validation_split=0.2)

In [None]:
print('initial accuracy on train: ', history.history['accuracy'][0])
print('final accuracy on train: ', history.history['accuracy'][-1])

print('initial accuracy on val: ', history.history['val_accuracy'][0])
print('final accuracy on val: ', history.history['val_accuracy'][-1])

### some notes

* inital configuration:
conv2d
batchnormalization
activation
maxpooling
flatten
dense

gave training accuracy 0.18 (1st epoch) -> 1 (last epoch), val accuracy 0.24 -> 0.37

So the train goes well but not the validation: need to regularize and/or have more data

* adding a dropout (after maxpooling): not much improvement
* adding dropout AND training on more data: tr: 0.17 -> 0.98, val 0.15 -> 0.4
* without dropout but more data: tr 0.2 -> 1, val 0.25 -> 0.42

the higher dropout the better improvement (0.5 better than 0.2)

* adding a dense layer before the final layer: not improved

* adding another conv2d layer (with batch, act, maxpooling): tr same, val 0.16 -> 0.4
but the val loss behaves better

* with more conv filters: tr same, val 0.12 -> 0.42

* with larger batch size: tr same, val 0.13 -> 0.43

* with more epochs: tr same, val 0.12 -> 0.46


In [None]:
# Plot training & validation accuracy values
plt.plot(history.history['accuracy'])
plt.plot(history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(history.history['loss'])
plt.plot(history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

In [None]:
score = cnn_model.evaluate(X_test_sub, y_test_onehot_sub, verbose=0)

In [None]:

print('Test loss:', score[0])
print('Test accuracy:', score[1])

## Pretrained net

In [None]:
from keras.applications.mobilenet import decode_predictions
from keras.applications.mobilenet import MobileNet
from keras.applications.inception_v3 import InceptionV3
from keras.applications.vgg16 import VGG16
from keras.applications.mobilenet import preprocess_input

In [None]:
model = MobileNet(weights='imagenet') 
model.summary()

In [None]:
from keras.backend import clear_session
clear_session()

In [None]:
# model = InceptionV3(weights='imagenet') 
# model.summary()

model = VGG16(weights='imagenet')
model.summary()

In [None]:
# #just testing if it works on a test image

# from keras.preprocessing import image
# import numpy as np
# from pprint import pprint

# img_path = '../../data/scorpion.jpg'

# img = image.load_img(img_path, target_size=(224, 224))
# x = image.img_to_array(img)
# x = np.expand_dims(x, axis=0)
# x = preprocess_input(x)
# preds = decode_predictions(model.predict(x), top=5)[0]
# preds = [(x[1], x[2]) for x in preds]
# pprint(preds)


In [None]:
X_train.shape

In [None]:
# base_model = MobileNet(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

#inceptionV3 cannot use images smaller than 75x75
# base_model = InceptionV3(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

base_model = VGG16(weights='imagenet', include_top=False, input_shape=(32, 32, 3))

In [None]:
base_model.summary()

In [None]:
from keras.layers import GlobalAveragePooling2D

# add a global spatial average pooling layer
x = base_model.output
x = GlobalAveragePooling2D()(x)
# let's add a fully-connected layer
x = Dense(1024, activation='relu')(x)
# and a logistic layer -- let's say we have 200 classes
predictions = Dense(10, activation='softmax')(x)

In [None]:
model = Model(inputs=base_model.input, outputs=predictions)

In [None]:
for layer in base_model.layers:
    layer.trainable = False

model.compile(optimizer='sgd', loss='categorical_crossentropy', metrics=['accuracy'])

In [None]:
# cnn_model.compile(optimizer='adam', loss='categorical_crossentropy',
#                  metrics=['accuracy']

In [None]:
model.fit(X_train_sub, y_train_onehot_sub, batch_size=32, epochs=100,
             validation_split=0.2)

In [None]:
for i, layer in enumerate(base_model.layers):
   print(i, layer.name)

In [None]:
for i, layer in enumerate(model.layers):
   print(i, layer.name)

In [None]:
for layer in model.layers[:19]:
   layer.trainable = False
for layer in model.layers[19:]:
   layer.trainable = True

In [None]:
from keras.optimizers import SGD
model.compile(optimizer='sgd', loss='categorical_crossentropy',
             metrics=['accuracy'])

# we train our model again (this time fine-tuning the top 2 inception blocks
# alongside the top Dense layers
mn_history = model.fit(X_train_sub, y_train_onehot_sub, batch_size=32, epochs=200,
             validation_split=0.2)

In [None]:
# mn_history = _

In [None]:
mn_history

In [None]:
print('initial accuracy on train: ', mn_history.history['accuracy'][0])
print('final accuracy on train: ', mn_history.history['accuracy'][-1])

print('initial accuracy on val: ', mn_history.history['val_accuracy'][0])
print('final accuracy on val: ', mn_history.history['val_accuracy'][-1])

In [None]:
# Plot training & validation accuracy values
plt.plot(mn_history.history['accuracy'])
plt.plot(mn_history.history['val_accuracy'])
plt.title('Model accuracy')
plt.ylabel('Accuracy')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()

# Plot training & validation loss values
plt.plot(mn_history.history['loss'])
plt.plot(mn_history.history['val_loss'])
plt.title('Model loss')
plt.ylabel('Loss')
plt.xlabel('Epoch')
plt.legend(['Train', 'Test'], loc='upper left')
plt.show()