Import

In [1]:
import cv2
import pandas as pd
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPool2D, Flatten,Dropout

Read all photos

In [2]:
imgs = []
for i in range(1,4001):
    num = str(i).zfill(4)
    img = cv2.imread('newimg/img'+ num +'.jpg')
    img = cv2.resize(img, (100,100))
    img = img/255
    imgs.append(img)
imgs = np.array(imgs)

Read labels

In [3]:
y = pd.read_csv('labels.txt',' ',header=None)
y = y.iloc[:,0].values

  exec(code_obj, self.user_global_ns, self.user_ns)


Shape of input

In [4]:
num_samples = imgs.shape[0]  # number of images
height = imgs.shape[1]  # height of each image
width = imgs.shape[2]  # width of each image
channels = imgs.shape[3]  # number of color channels
print(num_samples,height,width,channels)

4000 100 100 3


No reshape is needed but in that case we use this line

In [5]:
#imgs = imgs.reshape((num_samples, height, width, channels))

Split Data into three group : train 80%, test 20%, validation 20% of train

In [6]:
from sklearn.model_selection import train_test_split

x_train, x_test, y_train, y_test = train_test_split(imgs, y,
    test_size=0.2, shuffle = True, random_state = 20)

# Use the same function above for the validation set
x_train, x_val, y_train, y_val = train_test_split(x_train, y_train, 
    test_size=0.2, random_state= 20)

- Using augmentation for preventing overfiting and more powerful model, It rotate, shift, zoom and flip horizontal the photos randomly
- Using preprocessing tool available in keras -> imagedatagenerator

In [7]:
augmentation = ImageDataGenerator(
    rotation_range=5,  # randomly rotate images by up to 5 degrees
    width_shift_range=0.1,  # randomly shift images horizontally by up to 10%
    height_shift_range=0.1,  # randomly shift images vertically by up to 10%
    zoom_range=0.1,  # randomly zoom in and out on images
    horizontal_flip=True,  # randomly flip images horizontally
    fill_mode='nearest'  # fill in missing pixels with nearest neighbor
)

Adding layers and building CNN model structure

In [8]:
model = Sequential()
model.add(Conv2D(filters=32, kernel_size=3, activation='relu', input_shape=[100, 100, 3]))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Conv2D(filters=64, kernel_size=3, activation='relu'))
model.add(MaxPool2D(pool_size=2, strides=2))
model.add(Flatten())
model.add(Dropout(0.2))
model.add(Dense(256, activation='relu'))
model.add(Dense(1, activation='sigmoid'))
model.compile(optimizer='adam',loss='binary_crossentropy', metrics=['accuracy'])

Fit with augmentation and 40 epochs and validation set

In [9]:
history = model.fit(augmentation.flow(x_train, y_train, batch_size=32), epochs = 40, validation_data = (x_val, y_val))

Epoch 1/40
Epoch 2/40
Epoch 3/40
Epoch 4/40
Epoch 5/40
Epoch 6/40
Epoch 7/40
Epoch 8/40
Epoch 9/40
Epoch 10/40
Epoch 11/40
Epoch 12/40
Epoch 13/40
Epoch 14/40
Epoch 15/40
Epoch 16/40
Epoch 17/40
Epoch 18/40
Epoch 19/40
Epoch 20/40
Epoch 21/40
Epoch 22/40
Epoch 23/40
Epoch 24/40
Epoch 25/40
Epoch 26/40
Epoch 27/40
Epoch 28/40
Epoch 29/40
Epoch 30/40
Epoch 31/40
Epoch 32/40
Epoch 33/40
Epoch 34/40
Epoch 35/40
Epoch 36/40
Epoch 37/40
Epoch 38/40
Epoch 39/40
Epoch 40/40


Model summary

In [26]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 conv2d (Conv2D)             (None, 98, 98, 32)        896       
                                                                 
 max_pooling2d (MaxPooling2D  (None, 49, 49, 32)       0         
 )                                                               
                                                                 
 conv2d_1 (Conv2D)           (None, 47, 47, 64)        18496     
                                                                 
 max_pooling2d_1 (MaxPooling  (None, 23, 23, 64)       0         
 2D)                                                             
                                                                 
 conv2d_2 (Conv2D)           (None, 21, 21, 64)        36928     
                                                                 
 max_pooling2d_2 (MaxPooling  (None, 10, 10, 64)       0

Evaluate with test set, we obtained very good accuracy 93.75

In [15]:
model.evaluate(x_test,y_test)



[0.16764448583126068, 0.9375]

Save model with pickle

In [12]:
import pickle

filename = 'finalized_40epoch_smile_detection_CNN_model.sav'
pickle.dump(model, open(filename, 'wb'))

# also loading 
# loaded_model = pickle.load(open(filename, 'rb'))

Keras weights file (<HDF5 file "variables.h5" (mode r+)>) saving:
...layers\conv2d
......vars
.........0
.........1
...layers\conv2d_1
......vars
.........0
.........1
...layers\conv2d_2
......vars
.........0
.........1
...layers\dense
......vars
.........0
.........1
...layers\dense_1
......vars
.........0
.........1
...layers\dropout
......vars
...layers\flatten
......vars
...layers\max_pooling2d
......vars
...layers\max_pooling2d_1
......vars
...layers\max_pooling2d_2
......vars
...metrics\mean
......vars
.........0
.........1
...metrics\mean_metric_wrapper
......vars
.........0
.........1
...optimizer
......vars
.........0
.........1
.........10
.........11
.........12
.........13
.........14
.........15
.........16
.........17
.........18
.........19
.........2
.........20
.........3
.........4
.........5
.........6
.........7
.........8
.........9
...vars
Keras model archive saving:
File Name                                             Modified             Size
config.json       