In [0]:
import numpy as np
import matplotlib.pyplot as plt
import keras
from keras.models import Sequential
from keras.layers import Dense, Conv2D, MaxPooling2D, Dropout, Flatten
from keras.layers.normalization import BatchNormalization
from keras.preprocessing.image import ImageDataGenerator
from keras.callbacks import ModelCheckpoint

Using TensorFlow backend.


In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
!ls

Go to this URL in a browser: https://accounts.google.com/o/oauth2/auth?client_id=947318989803-6bn6qk8qdgf4n4g3pfee6491hc0brc4i.apps.googleusercontent.com&redirect_uri=urn%3Aietf%3Awg%3Aoauth%3A2.0%3Aoob&scope=email%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdocs.test%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fdrive.photos.readonly%20https%3A%2F%2Fwww.googleapis.com%2Fauth%2Fpeopleapi.readonly&response_type=code

Enter your authorization code:
··········
Mounted at /content/gdrive
gdrive	sample_data


##Model Architecture

In [0]:
# Specify the input shape to the first convolutional layer

input_shape = (120, 160, 3)
nClasses = 2
def createModel():
    model = Sequential()
    # a convolution layer of 32 features of size 3x3 with relu activation and zero padding
    model.add(Conv2D(32, (3, 3), padding='same', activation='relu', input_shape=input_shape))
    # a convolution layer of 32 features of size 3x3 with relu activation
    model.add(Conv2D(32, (3, 3), activation='relu'))
    # a batch normalization layer
    model.add(BatchNormalization())
    # maxpooling layer of filter size 2x2
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # a convolution layer of 64 features of size 3x3 with relu activation and zero padding
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    # a convolution layer of 64 features of size 3x3 with relu activation
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # a batch normalization layer
    model.add(BatchNormalization())
    # maxpooling layer of filter size 2x2
    model.add(MaxPooling2D(pool_size=(2, 2)))
    
    # a convolution layer of 64 features of size 3x3 with relu activation and zero padding
    model.add(Conv2D(64, (3, 3), padding='same', activation='relu'))
    # a convolution layer of 64 features of size 3x3 with relu activation
    model.add(Conv2D(64, (3, 3), activation='relu'))
    # a batch normalization layer
    model.add(BatchNormalization())
    # maxpooling layer of filter size 2x2
    model.add(MaxPooling2D(pool_size=(2, 2)))
    # a dropout layer of 50%
    model.add(Dropout(0.5))
    
    # flatten the output of the previous layer
    model.add(Flatten())
    # add a dense layer that outputs 512 units and apply relu activation
    model.add(Dense(512, activation='relu'))
    # a dropout layer of 50%
    model.add(Dropout(0.5))
    # add a dense layer with a softmax activation to classify the images
    model.add(Dense(nClasses, activation='softmax'))
     
    return model

##Load the old weights file and the save new best weights to the same file.

In [0]:
model1 = createModel()
model1.compile(optimizer='rmsprop', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model1.load_weights('gdrive/My Drive/ML/my_weights.hdf5')
checkpointer = ModelCheckpoint(filepath='gdrive/My Drive/ML/my_weights.hdf5',monitor='val_acc', verbose=1, save_best_only=True) # To Checkpoint weights

Instructions for updating:
Colocations handled automatically by placer.
Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


##Use ImageDataGenerator’s parameters to create a training validation split (0.2)
We used data augmentation using the commented parameters and helped us to reach 95.69% validation accuracy.

In [0]:
gen = ImageDataGenerator(
    #featurewise_center=True,
    #featurewise_std_normalization=True,
    validation_split=0.2,
    samplewise_center=True,
    samplewise_std_normalization=True,
#     horizontal_flip=True,
#     zoom_range=0.1,
#     rotation_range=10,
#     width_shift_range=0.1,
#     height_shift_range=0.1,
#     shear_range=0.1,
)

##Use Keras’ ImageDataGenerator to read the modified dataset

In [0]:
batch_size=32
train_generator = gen.flow_from_directory(
    directory="gdrive/My Drive/ML/train",
    batch_size=batch_size,
    target_size=(120, 160),
    class_mode='binary',
    seed=1,
    subset="training"
)

Found 3098 images belonging to 2 classes.


In [0]:
validation_generator = gen.flow_from_directory(
        'gdrive/My Drive/ML/train',
        batch_size=batch_size,
        target_size=(120, 160),
        class_mode='binary',
        seed=1,
        subset="validation"
)

Found 774 images belonging to 2 classes.


##Training

####Choose the appropriate steps_per_epoch and validation_steps value. These values are related to the batch size. Justify your choice.

For example, if the generator produces batches of size 32 samples and the whole training dataset is of size 2048 samples, then we need 64 batches of those to complete one epoch (Cover the whole train dataset) because 32*64 = 2048. That's why to know the number of steps per epoch (number of batches), we divide the total size of the dataset by the batch size. Same logic is applied for validation and test epochs. 

In [0]:
model1.fit_generator(
        train_generator,
        steps_per_epoch=train_generator.samples // batch_size,
        epochs=2,
        validation_data=validation_generator, 
        validation_steps=validation_generator.samples // batch_size,
        callbacks=[checkpointer]
)

Instructions for updating:
Use tf.cast instead.
Epoch 1/2

Epoch 00001: val_acc improved from -inf to 0.94141, saving model to gdrive/My Drive/ML/my_weights.hdf5
Epoch 2/2

Epoch 00002: val_acc improved from 0.94141 to 0.94609, saving model to gdrive/My Drive/ML/my_weights.hdf5


<keras.callbacks.History at 0x7f73c9e834e0>

In [0]:
#Cell used to change the file of weights and apply it to test dataset.
model1.load_weights('gdrive/My Drive/ML/my_weights.hdf5')

#TESTING

Testing Accuracy reaches 97.1%

In [0]:
test_datagen = ImageDataGenerator(
       samplewise_center=True,
       samplewise_std_normalization=True,
)

test_generator = test_datagen.flow_from_directory(
        'gdrive/My Drive/ML/test',
        target_size=(120, 160),
        class_mode='binary',
        batch_size=batch_size,
        seed=1, 
)
accuracy = model1.evaluate_generator(generator=test_generator, 
                                     steps=test_generator.samples/batch_size
                                     )
print("Accuracy =", accuracy[1])

Found 860 images belonging to 2 classes.
Accuracy = 0.963953488372093
