Mount the drive that contains the data.

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Import and load all the required packages.

In [3]:
from tensorflow.keras.models import Model, load_model
from tensorflow.keras.applications.vgg16 import VGG16, preprocess_input
from keras.preprocessing import image
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.models import Sequential
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import tensorflow as tf
from tensorflow.keras import Sequential
from tensorflow.keras.layers import Conv2D, Flatten, Dense, Input, BatchNormalization, Dropout, MaxPool2D
from sklearn.model_selection import train_test_split
from sklearn.metrics import confusion_matrix, classification_report
import seaborn as sns
import torch
import os


Specify the path to the file containing the data.
Split the available data into training data, testing data and validation data and reset the index.

In [4]:
path = "/content/drive/MyDrive/Mixed Images"
filenames = os.listdir(path)

df=pd.DataFrame({'filename':filenames})
df["category"] = df.apply(lambda x: x['filename'].split('_')[0], axis=1)

temp1 = df[df.category=='Damaged']
temp2 = df[df.category=='Undamaged']
df = pd.concat([temp1, temp2],ignore_index=True, axis = 0)
df.category.value_counts()

train_df, validate_df = train_test_split(df, test_size=0.30, random_state=42, stratify=df["category"])
validate_df, test_df = train_test_split(validate_df, test_size=0.5, random_state=42, stratify=validate_df["category"])


train_df = train_df.reset_index(drop=True)
validate_df = validate_df.reset_index(drop=True)
test_df = test_df.reset_index(drop=True)

In [5]:
print(train_df.category.value_counts())
print(validate_df.category.value_counts())
print(test_df.category.value_counts())

Damaged      1074
Undamaged    1028
Name: category, dtype: int64
Damaged      230
Undamaged    220
Name: category, dtype: int64
Damaged      230
Undamaged    221
Name: category, dtype: int64


Building the convolutional layers for the CNN model.

In [6]:
cnn= tf.keras.models.Sequential()

#First convolutional layer
cnn.add(tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),activation='relu',input_shape=[224,224,3]))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#Second convolutional layer
cnn.add(tf.keras.layers.Conv2D(filters=512,kernel_size=(3,3),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#Third convolutional layer
cnn.add(tf.keras.layers.Conv2D(filters=512,kernel_size=(3,3),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#Fourth convolutional layer
cnn.add(tf.keras.layers.Conv2D(filters=256,kernel_size=(3,3),activation='relu'))
cnn.add(BatchNormalization())
cnn.add(tf.keras.layers.MaxPool2D(pool_size=2, strides=2))
cnn.add(Dropout(0.2))

#Flattening
cnn.add(tf.keras.layers.Flatten())
cnn.add(Dropout(0.2))

#Fully connected layer
cnn.add(tf.keras.layers.Dense(128,activation='relu'))
cnn.add(Dropout(0.5))

#Output layer
cnn.add(tf.keras.layers.Dense(units=len(train_df.category.value_counts()),activation='softmax'))

Image augmentation using the Image data generator function.

In [None]:
train_datagen = ImageDataGenerator( rotation_range=15,
                                    rescale=1./255,
                                    shear_range=0.1,
                                    zoom_range=0.2,
                                    horizontal_flip=True,
                                    vertical_flip = True,
                                    width_shift_range=0.1,
                                    height_shift_range=0.1)

train_set = train_datagen.flow_from_dataframe(train_df,path,x_col='filename',y_col='category',
                                              target_size=(224, 224),class_mode='categorical',batch_size=32)

validation_datagen = ImageDataGenerator(rescale=1./255)

validation_set = validation_datagen.flow_from_dataframe(validate_df,path,x_col='filename',
                                                        y_col='category',target_size=(224, 224),
                                                        class_mode='categorical',batch_size=32)

test_datagen = ImageDataGenerator(rescale=1./255)

test_set = test_datagen.flow_from_dataframe(test_df,path,x_col='filename',
                                            y_col='category',target_size=(224, 224),
                                            class_mode='categorical',batch_size=32, shuffle=False)

Specify the loss function and the hyperparameters.

In [None]:
cnn.compile(loss='categorical_crossentropy', optimizer=tf.keras.optimizers.Adam(learning_rate=1e-04), metrics=['accuracy'])
cnn.summary()

Fit the model and use early stopping callback to stop training once the model performance stops improving.

In [None]:
callback = tf.keras.callbacks.EarlyStopping(monitor='val_loss', mode='min', patience=5, restore_best_weights=True)

r = cnn.fit(train_set, 
            validation_data=validation_set,
            epochs=15,
            steps_per_epoch=len(train_set),
            validation_steps=len(validation_set),
            callbacks=callback)

Plot loss and accuracy graphs to visualize the performance of the model.

In [None]:
plt.plot(r.history['loss'], label='train loss')
plt.plot(r.history['val_loss'], label='val loss')
plt.legend()
plt.savefig('/content/drive/MyDrive/CNN_ValLoss_2.png')
plt.show()

In [None]:
plt.plot(r.history['accuracy'], label='train acc')
plt.plot(r.history['val_accuracy'], label='val acc')
plt.legend()
plt.savefig('/content/drive/MyDrive/CNN_ValACC_2.png')
plt.show()

In [None]:
train_set.class_indices.items()

Save the trained model on the drive.

In [None]:
cnn.save('/content/drive/MyDrive/CNN.h5')

Load the saved model and make predictions on the test dataset.

In [None]:
cnn = tf.keras.models.load_model('/content/drive/MyDrive/CNN.h5')
test_set.reset()
pred = cnn.predict(test_set)
test_df["pred"] = np.argmax(pred, axis=1)
test_df["pred"] = test_df["pred"].replace({0:'Damaged',1:'Undamaged'})
test_df.tail()

In [None]:
test_df.to_csv("/content/drive/MyDrive/test_df.csv")

In [None]:
cnn.evaluate(test_set, verbose=2)

Plot a confusion matrix to understand the model predictions.

In [None]:
cm = confusion_matrix(test_df['category'], test_df['pred'])
sns.heatmap(cm, annot=True)

In [None]:
target_names = ['Damaged','Undamaged']
classification_report(test_df['category'], test_df['pred'], target_names= target_names)

Visualise the predictions made by the model.

In [None]:
from keras.preprocessing import image

sample_test = test_df.sample(n=35).reset_index(drop=True)
plt.figure(figsize=(20, 20))
for index, row in sample_test.iterrows():
    filename = row['filename']
    pred = row['pred']
    img = image.load_img(path + "/" + filename, target_size=(224,224))
    plt.subplot(5, 7, index+1)
    plt.imshow(img)
    plt.xlabel(filename.split(' ')[0] + '(' + "{}".format(pred) + ')' )
plt.tight_layout()
plt.show()

Example of Image augmentation.

In [None]:
# Image Augmentation
example_df = train_df.sample(n=1).reset_index(drop=True)
example_set = train_datagen.flow_from_dataframe(
    example_df, 
    path, 
    x_col='filename',
    y_col='category',
    target_size=(224,224),
    class_mode='categorical'
)

plt.figure(figsize=(12, 12))
for i in range(0, 15):
    plt.subplot(5, 3, i+1)
    for X_batch, Y_batch in example_set:
        image = X_batch[0]
        plt.imshow(image)
        break
plt.tight_layout()
plt.show()