In [50]:
#Numpy and Pandas are used to 
import numpy as np
import pandas as pd

#Below libraries are used for general operations
import os
from glob import glob
import random
import shutil

#Used for selecting image files for train and test folder
from sklearn.model_selection import train_test_split

#Image Data Generator is used to load image sequentially 
from keras.preprocessing.image import ImageDataGenerator

#Loading Model Parameters from Keras
from keras.models import Sequential
from keras.layers import Conv2D, MaxPooling2D
from keras.layers import Activation, Dropout, Flatten, Dense

#Confusion Matrix used for evaluation
from sklearn.metrics import confusion_matrix

#Callback is used to save the best model by monitoring Val accuracy
from keras import callbacks

#Importing pre trained model architecture 
from keras.applications.resnet50 import ResNet50
from keras.applications.vgg16 import VGG16

#To read the sved model
from keras.models import model_from_json

#The model optimizer
from keras.optimizers import RMSprop

In [2]:
path = os.getcwd()

Creating Train and Test directories

In [3]:
try:
    os.makedirs(path +'/Binary_data/Train/Positive')
except FileExistsError:
    pass

In [4]:
try:
    os.makedirs(path +'/Binary_data/Train/Negative')
except FileExistsError:
    pass

In [5]:
try:
    os.makedirs(path +'/Binary_data/Test/Positive')
except FileExistsError:
    pass

In [6]:
try:
    os.makedirs(path +'/Binary_data/Test/Negative')
except FileExistsError:
    pass

Splitting and moving image files into Train and Test folders created above

In [7]:
train_pos, test_pos = train_test_split(os.listdir(path+'/Binary_data/Positive/'),test_size=0.1, random_state=1)
train_neg, test_neg = train_test_split(os.listdir(path+'/Binary_data/Negative/'),test_size=0.1, random_state=1)

In [8]:
mv_train_pos = [shutil.copy(path+'/Binary_data/Positive/'+x,path+'/Binary_data/Train/Positive/') for x in train_pos]

In [9]:
mv_test_pos = [shutil.copy(path+'/Binary_data/Positive/'+x,path+'/Binary_data/Test/Positive/') for x in test_pos]

In [10]:
mv_train_neg = [shutil.copy(path+'/Binary_data/Negative/'+x,path+'/Binary_data/Train/Negative/') for x in train_neg]

In [11]:
mv_test_neg = [shutil.copy(path+'/Binary_data/Negative/'+x,path+'/Binary_data/Test/Negative/') for x in test_neg]

Using ImageDataGenerator to pass image files in batches to the model

In [12]:
datagen = ImageDataGenerator(
        rotation_range=40,
        width_shift_range=0,
        height_shift_range=0,
        rescale=1./255,
        shear_range=0,
        zoom_range=0,
        horizontal_flip=False,
        validation_split=0.2,
        fill_mode='nearest')

In [13]:
batch_size = 16
train_path = path +'/Binary_data/Train'

train_generator = datagen.flow_from_directory(
        train_path,  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary',
        subset="training")

Found 28800 images belonging to 2 classes.


In [14]:
validation_generator = datagen.flow_from_directory(
        train_path,  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary',
        subset="validation")

Found 7200 images belonging to 2 classes.


# Custom CNN model

Defining the custom CNN model

In [15]:
model = Sequential()
model.add(Conv2D(32, (3, 3), input_shape=(150, 150,3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(32, (3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(64, (3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

model.compile(loss='binary_crossentropy',
              optimizer='rmsprop',
              metrics=['accuracy'])






Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


In [16]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv2d_1 (Conv2D)            (None, 150, 150, 32)      896       
_________________________________________________________________
activation_1 (Activation)    (None, 150, 150, 32)      0         
_________________________________________________________________
max_pooling2d_1 (MaxPooling2 (None, 75, 75, 32)        0         
_________________________________________________________________
conv2d_2 (Conv2D)            (None, 75, 75, 32)        9248      
_________________________________________________________________
activation_2 (Activation)    (None, 75, 75, 32)        0         
_________________________________________________________________
max_pooling2d_2 (MaxPooling2 (None, 38, 38, 32)        0         
_________________________________________________________________
conv2d_3 (Conv2D)            (None, 38, 38, 64)        18496     
__________

In [17]:
model_json = model.to_json()

with open("cnn_model.json", "w") as json_file:
    json_file.write(model_json)

In [18]:
checkpoint_filepath = path + '/cnn_checkpoint'
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_acc',
    mode='max',
    save_best_only=True)

In [19]:
model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=15,
        validation_data=validation_generator,
        validation_steps=800 // batch_size,
        callbacks=[model_checkpoint_callback])




Epoch 1/15





Epoch 2/15
Epoch 3/15
Epoch 4/15
Epoch 5/15
Epoch 6/15
Epoch 7/15
Epoch 8/15
Epoch 9/15
Epoch 10/15
Epoch 11/15
Epoch 12/15
Epoch 13/15
Epoch 14/15
Epoch 15/15


<keras.callbacks.History at 0x14f248110>

In [20]:
#model.save_weights('cnn_binary_classification.h5')  # always save your weights after training or during training

Testing the model

In [71]:
test_datagen = ImageDataGenerator(
    rotation_range=0,
    width_shift_range=0,
    height_shift_range=0,
    rescale=1./255,
    shear_range=0,
    zoom_range=0,
    fill_mode='nearest')

test_path = path + "/Binary_data/Test"
test_generator = datagen.flow_from_directory(
        test_path,  # this is the target directory
        target_size=(150, 150),  # all images will be resized to 150x150
        batch_size=batch_size,
        class_mode='binary',shuffle=False)

def test_model(model_path,model_weights):
    
    json_file = open(model_path, 'r')
    loaded_model_json = json_file.read()
    json_file.close()
    
    loaded_model = model_from_json(loaded_model_json)
    
    loaded_model.load_weights(model_weights)
    
    probabilities = loaded_model.predict_generator(test_generator,steps=len(test_generator))
    
    y_true = test_generator.classes
    y_pred = probabilities >0.5
    
    cnf_matrix = confusion_matrix(y_pred,y_true)
    
    score = (cnf_matrix[0][0] + cnf_matrix[1][1])/y_pred.shape[0]
    
    return cnf_matrix,score
    

Found 4000 images belonging to 2 classes.


In [30]:
cnf_matrix_cnn, score_cnn = test_model('cnn_model.json','cnn_checkpoint')

In [31]:
print("Accuracy with custom CNN model: {}".format(score_cnn))

Accuracy with custom CNN model: 0.981


In [33]:
cnf_matrix_cnn

array([[1978,   54],
       [  22, 1946]])

# ResNet50 Model

In [102]:
base_model = ResNet50(include_top= False,input_shape=(150,150,3),weights='imagenet')



In [103]:
'''for layers in base_model.layers[:]:
    layers.trainable = False'''

'for layers in base_model.layers[:]:\n    layers.trainable = False'

In [104]:
model = Sequential()

model.add(base_model)

model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(32, (3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(64, (3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [105]:
opt = RMSprop(lr=0.0001)
model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [106]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
resnet50 (Model)             (None, 5, 5, 2048)        23587712  
_________________________________________________________________
conv2d_22 (Conv2D)           (None, 5, 5, 32)          589856    
_________________________________________________________________
activation_381 (Activation)  (None, 5, 5, 32)          0         
_________________________________________________________________
max_pooling2d_29 (MaxPooling (None, 3, 3, 32)          0         
_________________________________________________________________
conv2d_23 (Conv2D)           (None, 3, 3, 32)          9248      
_________________________________________________________________
activation_382 (Activation)  (None, 3, 3, 32)          0         
_________________________________________________________________
max_pooling2d_30 (MaxPooling (None, 2, 2, 32)          0         
__________

In [107]:
model_json = model.to_json()

with open("resnet50_model.json", "w") as json_file:
    json_file.write(model_json)

In [108]:
checkpoint_filepath = path + '/resnet50_chkpnt'
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_acc',
    mode='max',
    save_best_only=True)

In [109]:
model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=5,
        validation_data=validation_generator,
        validation_steps=800 // batch_size,
        callbacks=[model_checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x196f1f790>

Testing the Resnet50 model

In [110]:
cnf_matrix_res, score_res = test_model("resnet50_model.json",'resnet50_chkpnt')

In [111]:
print("Accuracy with Resnet50 model: {}".format(score_res))

Accuracy with Resnet50 model: 0.99775


In [112]:
cnf_matrix_res

array([[1996,    5],
       [   4, 1995]])

# VGG16 Model

In [146]:
base_model = VGG16(include_top= False,input_shape=(150,150,3),weights='imagenet')

In [147]:
'''for layers in base_model.layers[:]:
    layers.trainable = False'''

'for layers in base_model.layers[:]:\n    layers.trainable = False'

In [148]:
model = Sequential()

model.add(base_model)

model.add(Conv2D(32, (3, 3), padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(32, (3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Conv2D(64, (3, 3),padding='same'))
model.add(Activation('relu'))
model.add(MaxPooling2D(pool_size=(2, 2),padding='same'))

model.add(Flatten())  # this converts our 3D feature maps to 1D feature vectors
model.add(Dense(64))
model.add(Activation('relu'))
model.add(Dropout(0.5))
model.add(Dense(1))
model.add(Activation('sigmoid'))

In [149]:
opt = RMSprop()
model.compile(loss='binary_crossentropy',
              optimizer=opt,
              metrics=['accuracy'])

In [150]:
model.summary()

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
vgg16 (Model)                (None, 4, 4, 512)         14714688  
_________________________________________________________________
conv2d_31 (Conv2D)           (None, 4, 4, 32)          147488    
_________________________________________________________________
activation_396 (Activation)  (None, 4, 4, 32)          0         
_________________________________________________________________
max_pooling2d_38 (MaxPooling (None, 2, 2, 32)          0         
_________________________________________________________________
conv2d_32 (Conv2D)           (None, 2, 2, 32)          9248      
_________________________________________________________________
activation_397 (Activation)  (None, 2, 2, 32)          0         
_________________________________________________________________
max_pooling2d_39 (MaxPooling (None, 1, 1, 32)          0         
__________

In [151]:
model_json = model.to_json()

with open("vgg16_model.json", "w") as json_file:
    json_file.write(model_json)

In [152]:
checkpoint_filepath = path + '/vgg16_chkpnt'
model_checkpoint_callback = callbacks.ModelCheckpoint(
    filepath=checkpoint_filepath,
    save_weights_only=True,
    monitor='val_acc',
    mode='max',
    save_best_only=True)

In [141]:
model.fit_generator(
        train_generator,
        steps_per_epoch=2000 // batch_size,
        epochs=5,
        validation_data=validation_generator,
        validation_steps=800 // batch_size,
        callbacks=[model_checkpoint_callback])

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x1df233a10>

In [142]:
cnf_matrix_vgg ,score_vgg = test_model('vgg16_model.json','vgg16_chkpnt')

In [143]:
print('Accuracy with VGG16 model: {}'.format(score_vgg))

Accuracy with VGG16 model: 0.5


In [144]:
cnf_matrix_vgg

array([[   0,    0],
       [2000, 2000]])

In [145]:
print('CNN: {} , Resnet50: {}, VGG16: {}'.format(score_cnn,score_res,score_vgg))

CNN: 0.981 , Resnet50: 0.99775, VGG16: 0.5
