In [0]:
from google.colab import drive
drive.mount('/content/drive')
## there are two zips uploaded,assignment.zip or assignment(1).zip,please use other if binary compression error occurs!!
!unzip '/content/drive/My Drive/Assignment.zip'

In [0]:
import os
import h5py
import numpy as np
from keras.preprocessing.image import ImageDataGenerator
from keras import optimizers
from keras.models import Sequential
from keras.layers import Dense,Conv2D,MaxPooling2D,Flatten,Dropout,Flatten,Reshape,ZeroPadding2D
from keras.layers import Activation, Dropout, Flatten, Dense 
from keras import backend as K
import tensorflow as tf
##config = tf.ConfigProto()
config = tf.ConfigProto(device_count ={'GPU': 0}) 
config.gpu_options.allow_growth = True

In [0]:
weights_path = '/content/drive/My Drive/model_weights.h5'

# dimensions of our images. the model downloaded was trained on images resized to 150*150 images
img_width, img_height = 800,800 

In [0]:
train_data_dir = '/content/Assignment/Input/Dataset/train' ## train data
validation_data_dir = '/content/Assignment/Input/Dataset/validation' ## augmented validation data created by augment.py
nb_train_samples = 50
nb_validation_samples = 30
nb_epoch = 100

In [0]:
# build the model for dev
model = Sequential()
## although all images are same size, zero padding doesnt hurt if were dealing with unknown test dimensions. also helps with image warping 
model.add(ZeroPadding2D((1, 1), input_shape=(img_width, img_height,1)))
## first convolution stage
model.add(Conv2D(64, 3, 3, activation='relu', name='conv1_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(64, 3, 3, activation='relu', name='conv1_2'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
## second convolution stage
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(256, 3, 3, activation='relu', name='conv2_1'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(256, 3, 3, activation='relu', name='conv2_2'))
model.add(ZeroPadding2D((1, 1)))
model.add(Conv2D(256, 3, 3, activation='relu', name='conv2_3'))
model.add(MaxPooling2D((2, 2), strides=(2, 2)))
## flatten the output from layer above to feed it to decision making dense layers
model.add(Flatten())
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
## softmax because it is a multi-class problem
model.add(Dense(4, activation='softmax'))


load the weights of the VGG16 networks(trained on ImageNet, won the ILSVRC competition in 2014) note: when there is a complete match between your model definition and your weight savefile, you can simply call model.load_weights(filename)

In [0]:
## weights were reloaded to faciliate fine tuning
model.load_weights(weights_path)
print('Model loaded.')

# Matthews Correlation degree
Matthews_correlation is often regarded as the best measure for binary classification
as latest versions of keras removed this useful metrics, I have written a version following source code of previous versions of keras
A value close to 1 is often regarded as best


In [0]:
def mcor(y_true, y_pred):
     #matthews_correlation
     y_pred_pos = K.round(K.clip(y_pred, 0, 1))
     y_pred_neg = 1 - y_pred_pos
 
 
     y_pos = K.round(K.clip(y_true, 0, 1))
     y_neg = 1 - y_pos
 
 
     tp = K.sum(y_pos * y_pred_pos)
     tn = K.sum(y_neg * y_pred_neg)
 
 
     fp = K.sum(y_neg * y_pred_pos)
     fn = K.sum(y_pos * y_pred_neg)
 
 
     numerator = (tp * tn - fp * fn)
     denominator = K.sqrt((tp + fp) * (tp + fn) * (tn + fp) * (tn + fn))
 
 
     return numerator / (denominator + K.epsilon())

# F-1 Score metric 

F1 score is often a good measure to gauge a model performance
The function below calculates precision and recall and then calculates
F1 score using the formula mentioned in the return method


In [0]:
def f1(y_true, y_pred):
    def recall(y_true, y_pred):
        """Recall metric.

        Only computes a batch-wise average of recall.

        Computes the recall, a metric for multi-label classification of
        how many relevant items are selected.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        possible_positives = K.sum(K.round(K.clip(y_true, 0, 1)))
        recall = true_positives / (possible_positives + K.epsilon())
        return recall

    def precision(y_true, y_pred):
        """Precision metric.

        Only computes a batch-wise average of precision.

        Computes the precision, a metric for multi-label classification of
        how many selected items are relevant.
        """
        true_positives = K.sum(K.round(K.clip(y_true * y_pred, 0, 1)))
        predicted_positives = K.sum(K.round(K.clip(y_pred, 0, 1)))
        precision = true_positives / (predicted_positives + K.epsilon())
        return precision
    precision = precision(y_true, y_pred)
    recall = recall(y_true, y_pred)
    return 2*((precision*recall)/(precision+recall+K.epsilon()))

In [0]:
# compile the model with a Adamax optimizer

# and a very slow learning rate.
## adam rate set is 1e-4 was used for training first set of model weights,the model was finetuned with adamax with a decay of 
## 0.002 not written in the code

model.compile(loss='categorical_crossentropy',
          optimizer=optimizers.Adamax(lr=1e-4),
          metrics=['accuracy',f1,mcor])


In [0]:
# prepare data augmentation configuration
train_datagen = ImageDataGenerator(
    rescale=1./255,
    shear_range=0.2,
    zoom_range=0.2,
    horizontal_flip=True)


In [0]:
test_datagen = ImageDataGenerator(rescale=1./255)

train_generator = train_datagen.flow_from_directory(
    train_data_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    class_mode= 'categorical',color_mode='grayscale')

validation_generator = test_datagen.flow_from_directory(
    validation_data_dir,
    target_size=(img_height, img_width),
    batch_size=32,
    class_mode= 'categorical',color_mode='grayscale')


In [0]:
# tune the model
tf.Session(config = config)
model.fit_generator(
    train_generator,
    samples_per_epoch=nb_train_samples,
    nb_epoch=nb_epoch,
    validation_data=validation_generator,
    nb_val_samples=nb_validation_samples)

model.save_weights("/content/drive/My Drive/model_weights.h5")
model.save("/content/drive/My Drive/category_model.h5", True)


As shown in results above the model gives good accuracy on training(around 95%) and competitive performance on validation set(contains many stray pixels) 