# Convolution Neural Network with K-Fold

Convolution Neural Networks or CNN or Convnets are the current state of the art for most computer vision tasks.

This notebook will aply k-fold with a very simple CNN architecture.

In [10]:
# Import libraries and ignore warnings

import warnings
warnings.filterwarnings('ignore')

import pandas as pd
import numpy as np
np.random.seed(2)

from sklearn.model_selection import StratifiedKFold
from sklearn.metrics import classification_report

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, LabelEncoder
#from sklearn.metrics import confusion_matrix

from keras import models
from keras import layers
from keras import optimizers
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ReduceLROnPlateau, EarlyStopping
from keras.preprocessing import image

import os
from PIL import Image

# Data  
  
The first step is to import images and store their **pixel** values in a dataframe. This will allow us to create 10 fold by using the index of the rows.  
  
We will also **shuffle** the dataframe because the first 200 images are smilling and next 200 images are neutral.

In [11]:
# Read the annotations file that contains the label and the image file name
labels = pd.read_csv('./SMILE_Dataset/annotations.csv', header=None, names=['fname','label'])

# Shuffle data
labels = labels.sample(frac=1).reset_index()

# Use a list comprehension to loop over image file names and import one by one and store pixel values
x = np.array([image.img_to_array(image.load_img('./SMILE_Dataset/all/'+fname, target_size=(128, 128))) for fname in labels['fname']])

# Because the names are strings, the neural network only takes in numerical formats so we will one-hot encode the label
label_encoder = LabelEncoder()
integer_encoded = label_encoder.fit_transform(labels['label'])
y = integer_encoded

Now we have two variables.  
  
x: all of the values for our images  
y: all of the labels (0:1)
  
Now we have finished working with the data. Let's define an architecture for our CNN.  

# Model  

Here I defined a function that will be called when we loop over our 10 folds. This is just to keep the code cleaner later on.

In [12]:
def build_model():

    model = models.Sequential()
    model.add(layers.Conv2D(32, (3, 3), activation='relu', input_shape=(128, 128, 3)))
    model.add(layers.MaxPooling2D((2, 2)))
    
    model.add(layers.Conv2D(64, (3, 3), activation='relu'))
    model.add(layers.MaxPooling2D((2, 2)))
    
    # Switched 24 to 128, got 96.75 with 128.
    model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    #model.add(layers.Dropout(0.05)) # added small dropout to help with overfitting - not good
    
    
    
    # Added following but overfitting
    
    #model.add(layers.MaxPooling2D((2, 2)))
    
    #model.add(layers.Conv2D(128, (3, 3), activation='relu'))
    #model.add(layers.MaxPooling2D((2, 2)))

    # Feed to a densily connected layer for prediction
    model.add(layers.Flatten())
    model.add(layers.Dense(64, activation='relu'))
    model.add(layers.Dense(1, activation='sigmoid'))
    
    model.compile(loss='binary_crossentropy',
              optimizer=optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0),
              metrics=['acc'])
    
    return model

# K-Fold

In [34]:
# All classification reports will be added here. When we are done we can average the f1 scores
reports = []

# Apply stratified K-fold ith 10 splits. Stratified means the same distribution of classes than the whole dataset
# In this case, 50-50
kf = StratifiedKFold(n_splits=2)

# Just for printing purposes
id = 1

for train_index, test_index in kf.split(x,y):
    print('Kfold iteration {}/10'.format(id))
    print('Total images: {} ---- Train images: {} ---- Test images: {}'.format(len(x),len(train_index),len(test_index)))

    id += 1 
    
    X_train, X_test = x[train_index], x[test_index]
    y_train, y_test = y[train_index], y[test_index]
        
    #model = build_model()
    model = loaded_model
    
    datagen = ImageDataGenerator(rescale=1./255,
                                 rotation_range=10, # randomly rotate images in the range (degrees, 0 to 180)
                                 width_shift_range=0.1, # randomly shift images horizontally (fraction of total width)
                                 height_shift_range=0.1, 
                                 shear_range=0.1,
                                 zoom_range=0.1)   
    
    datagen.fit(X_train)

    # Secret sauce to get 3-5 % accuracy more
    # Adjust the learning rate over time. (Like we saw in class!)
    # The learning rate determines the size of the steps taken during the gradient descent process.
    
    learning_rate_reduction = ReduceLROnPlateau(monitor='val_acc', 
                                            patience=3, 
                                            verbose=1, 
                                            factor=0.5, 
                                            min_lr=0.00001)
    
    # Used to prevent overfitting. 
    # es = EarlyStopping(monitor='val_loss', mode='min', verbose=1, patience=50)
    
    history = model.fit_generator(datagen.flow(X_train, y_train, batch_size = 20), epochs = 20, 
                              validation_data = (X_test,y_test), steps_per_epoch=len(X_train) / 20,
                              callbacks=[learning_rate_reduction])
    
    y_pred = model.predict(X_test)
    y_pred = [np.round(p[0]) for p in y_pred]
    
    print(classification_report(y_test, y_pred))
    reports.append(classification_report(y_test, y_pred,output_dict=True))

Kfold iteration 1/10
Total images: 400 ---- Train images: 200 ---- Test images: 200
Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20

Epoch 00008: ReduceLROnPlateau reducing learning rate to 0.0005000000237487257.
Epoch 9/20
Epoch 10/20
Epoch 11/20

Epoch 00011: ReduceLROnPlateau reducing learning rate to 0.0002500000118743628.
Epoch 12/20
Epoch 13/20
Epoch 14/20

Epoch 00014: ReduceLROnPlateau reducing learning rate to 0.0001250000059371814.
Epoch 15/20
Epoch 16/20
Epoch 17/20

Epoch 00017: ReduceLROnPlateau reducing learning rate to 6.25000029685907e-05.
Epoch 18/20
Epoch 19/20
Epoch 20/20

Epoch 00020: ReduceLROnPlateau reducing learning rate to 3.125000148429535e-05.
              precision    recall  f1-score   support

           0       0.81      1.00      0.89       100
           1       1.00      0.76      0.86       100

    accuracy                           0.88       200
   macro avg       0.90      0.88      0.88       200
weighted 

KeyboardInterrupt: 

In [None]:
model.save('my_model.h5')

In [9]:
# We loop over all reports (1 per fold) and then compute the average of all weighted f1 scores
final_f1_score = np.mean([rep['weighted avg']['f1-score'] for rep in reports])

print('Final F1-Score is: {}%'.format(np.round(final_f1_score*100,2)))

Final F1-Score is: 95.48%


In [None]:
model.load_model('my_model.h5')

In [15]:
from keras.models import model_from_json

In [16]:
# serialize model to JSON
model_json = model.to_json()
with open("model.json", "w") as json_file:
    json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("model.h5")
print("Saved model to disk")

Saved model to disk


In [37]:
# load json and create model
json_file = open('model.json', 'r')
loaded_model_json = json_file.read()
json_file.close()
loaded_model = model_from_json(loaded_model_json)
# load weights into new model
loaded_model.load_weights("model.h5")
print("Loaded model from disk")

Loaded model from disk


In [38]:
# remove the last 2 dense FC layers and freeze it
loaded_model.pop()
loaded_model.pop()

In [39]:
loaded_model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 126, 126, 32)      896       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 61, 61, 64)        18496     
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 30, 30, 64)        0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 28, 28, 128)       73856     
_________________________________________________________________
flatten_13 (Flatten)         (None, 100352)            0         
Total params: 93,248
Trainable params: 93,248
Non-trainable params: 0
_________________________________________________

In [40]:
for layer in loaded_model.layers[:3]:
        layer.trainable = False

In [41]:
loaded_model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 126, 126, 32)      896       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 61, 61, 64)        18496     
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 30, 30, 64)        0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 28, 28, 128)       73856     
_________________________________________________________________
flatten_13 (Flatten)         (None, 100352)            0         
Total params: 93,248
Trainable params: 73,856
Non-trainable params: 19,392
____________________________________________

In [32]:
loaded_model.add(layers.Dense(32, activation='relu'))
loaded_model.add(layers.Dense(1, activation='sigmoid'))

loaded_model.compile(loss='binary_crossentropy',
          optimizer=optimizers.RMSprop(lr=0.001, rho=0.9, epsilon=1e-08, decay=0.0),
          metrics=['acc'])

In [33]:
loaded_model.summary()

Model: "sequential_13"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_37 (Conv2D)           (None, 126, 126, 32)      896       
_________________________________________________________________
max_pooling2d_25 (MaxPooling (None, 63, 63, 32)        0         
_________________________________________________________________
conv2d_38 (Conv2D)           (None, 61, 61, 64)        18496     
_________________________________________________________________
max_pooling2d_26 (MaxPooling (None, 30, 30, 64)        0         
_________________________________________________________________
conv2d_39 (Conv2D)           (None, 28, 28, 128)       73856     
_________________________________________________________________
flatten_13 (Flatten)         (None, 100352)            0         
_________________________________________________________________
dense_29 (Dense)             (None, 32)              