### Team: <br>
1. Dina Zakria 
2. Ahmed Sameh
3. Abdelrhman Amr

In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

## Imports

In [None]:
# Imports
import cv2
import os
from tqdm import tqdm
from glob import glob
import tensorflow as tf
from tensorflow import keras
from keras.utils import np_utils
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import matplotlib.image as mpimg


from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential, Model
from keras.layers import Conv2D, MaxPooling2D, Flatten, Dense, Dropout, BatchNormalization, GlobalAveragePooling2D

## Loading Dataset

In [None]:
df = pd.read_csv("../input/state-farm-distracted-driver-detection/driver_imgs_list.csv")
df.head()

In [None]:
# Group bt Drivers/ Test Subjects
by_drivers = df.groupby('subject')

unique_drivers = by_drivers.groups.keys()

print("There are: ", len(unique_drivers), " unique drivers")
print('These are the numbers of each test subject: \n',round(by_drivers.count()['classname']))

In [None]:
# Setting the number of classes to be classified.
NUMBER_CLASSES = 10

> ### Helper Functions

In [None]:
# Read with opencv
def get_cv2_image(path, img_rows, img_cols, color_type=3):
    """
    Function that return an opencv image from the path and the right number of dimension
    """
    if color_type == 1: # Loading as Grayscale image
        img = cv2.imread(path, cv2.IMREAD_GRAYSCALE)
    elif color_type == 3: # Loading as color image
        img = cv2.imread(path, cv2.IMREAD_COLOR)
        img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB) # Converts to RGB

    img = cv2.resize(img, (img_rows, img_cols)) # Reduce size
    return img

# Loading Training dataset
def load_train(img_rows, img_cols, color_type=3):
    """
    Return train images and train labels from the original path
    """
    train_images = [] 
    train_labels = []
    # Loop over the training folder 
    for classed in tqdm(range(NUMBER_CLASSES)):
        print('Loading directory c{}'.format(classed))
        files = glob(os.path.join('../input/state-farm-distracted-driver-detection/imgs/train/c' + str(classed), '*.jpg'))
        for file in files:
            img = get_cv2_image(file, img_rows, img_cols, color_type)
            train_images.append(img)
            train_labels.append(classed)
    return train_images, train_labels 

def read_and_normalize_train_data(img_rows, img_cols, color_type):
    """
    Load + categorical + split
    """
    X, labels = load_train(img_rows, img_cols, color_type)
    y = np_utils.to_categorical(labels, 10) #categorical train label
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) # split into train and test
    x_train = np.array(x_train, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    x_test = np.array(x_test, dtype=np.uint8).reshape(-1,img_rows,img_cols,color_type)
    
    return x_train, x_test, y_train, y_test

# Loading validation dataset
def load_test(size=200000, img_rows=64, img_cols=64, color_type=3):
    """
    Same as above but for validation dataset
    """
    path = os.path.join('../input/state-farm-distracted-driver-detection/imgs/test', '*.jpg')
    files = sorted(glob(path))
    X_test, X_test_id = [], []
    total = 0
    files_size = len(files)
    for file in tqdm(files):
        if total >= size or total >= files_size:
            break
        file_base = os.path.basename(file)
        img = get_cv2_image(file, img_rows, img_cols, color_type)
        X_test.append(img)
        X_test_id.append(file_base)
        total += 1
    return X_test, X_test_id

def read_and_normalize_sampled_test_data(size, img_rows, img_cols, color_type=3):
    test_data, test_ids = load_test(size, img_rows, img_cols, color_type)   
    test_data = np.array(test_data, dtype=np.uint8)
    test_data = test_data.reshape(-1,img_rows,img_cols,color_type)
    return test_data, test_ids

In [None]:
# dimension of images
img_rows = 128 
img_cols = 128

color_type = 1 # grey
nb_test_samples = 200

In [None]:
# loading train images
x_train, x_test, y_train, y_test = read_and_normalize_train_data(img_rows, img_cols, color_type)

# loading validation images
test_files, test_targets = read_and_normalize_sampled_test_data(nb_test_samples, img_rows, img_cols, color_type)

## EDA

In [None]:
x_train_size = len(x_train)
x_test_size = len(x_test)
test_files_size = len(np.array(glob(os.path.join('../input/state-farm-distracted-driver-detection/imgs/test', '*.jpg'))))

> ## Statistical numbers about the data

In [None]:
print('There are %s total images.' %(x_train_size + x_test_size + test_files_size))
print('There are %d total training categories.' %NUMBER_CLASSES )
print('There are %d training images.' % x_train_size)
print('There are %d validation images.' % x_test_size)
print('There are %d test images.'% test_files_size)

> ## Data Visualization

In [None]:
import plotly.express as px

px.histogram(df, x="classname", color="classname", title="Number of images by categories ")

> **Comment:** As we can see from the figure above, the classes are well balanced.

In [None]:
# Number of Images by Drivers / Test Subject

drivers_id = pd.DataFrame((df['subject'].value_counts()).reset_index())
drivers_id.columns = ['driver_id', 'Counts']
px.histogram(drivers_id, x="driver_id",y="Counts" ,color="driver_id", title="Number of images by subjects ")

In [None]:
# np.save('./x_train.npy',x_train)
# np.save('./y_train.npy',y_train)
# np.save('./x_test.npy',x_test)
# np.save('./y_test.npy',y_test)

# Fully connected layer

In [None]:
# x_train = np.load('../input/dl-project/x_train.npy').astype('float32')/255
# y_train = np_utils.to_categorical(np.load('../input/dl-project/y_train.npy'))
# x_val = np.load('../input/dl-project/x_test.npy').astype('float32')/255
# y_val = np_utils.to_categorical(np.load('../input/dl-project/y_test.npy'))

In [None]:
# x_train = tf.convert_to_tensor(x_train, dtype=tf.float32)/255
# y_train = tf.convert_to_tensor(y_train, dtype=tf.float32)
# x_val = tf.convert_to_tensor(x_val, dtype=tf.float32)/255
# y_val = tf.convert_to_tensor(y_val, dtype=tf.float32)

In [None]:
no_epoch = 20
batch_size = 64
img_height = img_rows
img_width = img_cols
channels = 1

> ## Building initial model

In [None]:
# temp
x_train_FC = x_train.reshape((x_train_size, img_rows*img_cols*1))
x_train_FC = x_train_FC.astype('float32')/255
x_val_FC = x_test.reshape((x_test_size, img_rows*img_cols*1))
x_val_FC = x_val_FC.astype('float32')/255
y_val = y_test

In [None]:
FC_init1 = Sequential()
# FC_init.add(Flatten())
FC_init1.add(Dense(512, activation='relu', name='Layer_1', input_shape=(img_width * img_height * channels,)))
FC_init1.add(Dense(256, activation='relu', name='Layer_2'))
FC_init1.add(Dense(128, activation='relu', name='Layer_3'))
FC_init1.add(Dense(10, activation='softmax'))

In [None]:
opt = keras.optimizers.Adam(learning_rate=10e-3)
FC_init1.compile(optimizer=opt,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [None]:
FC_init1.summary()

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.001,
    patience=3,
    verbose=1,
    mode='min',
    baseline=None,
    restore_best_weights=True
)

In [None]:
History = FC_init1.fit(x_train_FC,y_train, validation_data=(x_val_FC,y_val), verbose = 1, epochs = no_epoch, batch_size = batch_size,callbacks=[early_stopping])

In [None]:
acc = History.history['accuracy']
val_acc = History.history['val_accuracy']
loss = History.history['loss']
val_loss = History.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

## Predict 

In [None]:
test_files.dtype

In [None]:
test_files[0].shape

In [None]:
x_train.dtype

In [None]:
print(len(test_files))
print(test_files_size)


In [None]:
test_imgs = test_files.reshape((nb_test_samples, img_rows*img_cols*1))
test_imgs = test_imgs.astype('float32')/255


In [None]:
pred = FC_init1.predict(test_imgs)
pred[0]

## Building Baseline CNN Model

In [None]:
# temp
x_train_CNN = x_train.reshape((x_train_size, img_rows,img_cols,1))
x_train_CNN = x_train_CNN.astype('float32')/255
x_val_CNN = x_test.reshape((x_test_size, img_rows,img_cols,1))
x_val_CNN = x_val_CNN.astype('float32')/255
y_val = y_test

In [None]:
CNN_model = Sequential()
CNN_model.add(Conv2D(32, (3, 3), activation='relu', input_shape=(img_height, img_width, channels)))
CNN_model.add(MaxPooling2D((2, 2)))
CNN_model.add(Conv2D(64, (3, 3), activation='relu'))
CNN_model.add(MaxPooling2D((2, 2)))
CNN_model.add(Conv2D(64, (3, 3), activation='relu'))
CNN_model.add(Flatten())
CNN_model.add(Dense(64, activation='relu'))
CNN_model.add(Dense(10, activation='softmax'))


In [None]:
adam = tf.keras.optimizers.Adam(learning_rate=0.0001)
CNN_model.compile(optimizer=adam,
              loss='categorical_crossentropy',
              metrics=['accuracy'])

In [None]:
CNN_model.summary()

In [None]:
History_CNN = CNN_model.fit(x_train_CNN,y_train, validation_data=(x_val_CNN,y_val), verbose = 1, epochs = no_epoch, batch_size = batch_size,callbacks=[early_stopping])

In [None]:
acc = History_CNN.history['accuracy']
val_acc = History_CNN.history['val_accuracy']
loss = History_CNN.history['loss']
val_loss = History_CNN.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()

In [None]:
test_imgs = test_files.reshape((nb_test_samples, img_rows,img_cols,1))
test_imgs = test_imgs.astype('float32')/255


In [None]:
pred = CNN_model.predict(test_imgs)
pred[0]

## Data Augmentation

In [None]:
# Version 01

Traindatagen = ImageDataGenerator(
      featurewise_center = True,
      featurewise_std_normalization = True,
      rescale = 1.0/255,
      rotation_range=20,
      shear_range=0.2,
      horizontal_flip=True,
      vertical_flip=False,
      fill_mode='nearest')


Valdatagen =  ImageDataGenerator(featurewise_center = True, 
                                 featurewise_std_normalization = True, 
                                 rescale=1.0/ 255, 
                                 validation_split = 0.2)

# Transfer learning using VGG16

In [None]:
# loading train images
x_train_vgg, x_test_vgg, y_train_vgg, y_test_vgg = read_and_normalize_train_data(img_rows, img_cols, 3)

# loading validation images
# test_files, test_targets = read_and_normalize_sampled_test_data(nb_test_samples, img_rows, img_cols, 3)

In [None]:
train_generator = Traindatagen.flow(x_train_vgg, y_train_vgg, batch_size = 256)
val_generator = Valdatagen.flow(x_test_vgg, y_test_vgg, batch_size = 64)

In [None]:
epochs = 40
train_bs = 256
valdi_bs = 64

In [None]:
from keras.applications.vgg16 import VGG16

def classificationModel():
    inp = keras.layers.Input(shape=(128, 128, 3))
    vgg = VGG16(weights='imagenet',
                  include_top=False,
                  input_tensor = inp,
                  input_shape=(128, 128, 3))
    vgg.trainable = False
    
    x = vgg.get_layer('block5_pool').output
    x = tf.keras.layers.Flatten()(x)
    # x = tf.keras.layers.GlobalAveragePooling2D()(x)
    x = keras.layers.Dense(256, activation='relu')(x)
    output = keras.layers.Dense(10, activation='softmax')(x)
    
    model = tf.keras.models.Model(inputs = inp, outputs=output)

    return model
    

In [None]:
keras.backend.clear_session()
model = classificationModel()
model.summary()

In [None]:
# opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
# model.compile(optimizer='rmsprop',
#                 loss='categorical_crossentropy',
#                 metrics=['accuracy'])

In [None]:
opt = tf.keras.optimizers.Adam(learning_rate=0.0001)
model.compile(optimizer=opt,
                loss='categorical_crossentropy',
                metrics=['accuracy'])

In [None]:
early_stopping = keras.callbacks.EarlyStopping(
    monitor='val_loss',
    min_delta=0.0001,
    patience=3,
    verbose=1,
    mode='min',
    baseline=None,
    restore_best_weights=True
)

## CNN Visualization

In [None]:
%%capture
!pip install wandb

In [None]:
import wandb
from wandb.keras import WandbCallback

wandb.login()

In [None]:
class GradCAM:
    """
    Reference:
        https://www.pyimagesearch.com/2020/03/09/grad-cam-visualize-class-activation-maps-with-keras-tensorflow-and-deep-learning/
    """

    def __init__(self, model, layerName):
        self.model = model
        self.layerName = layerName

        self.gradModel = tf.keras.models.Model(inputs=[self.model.inputs],
                                               outputs=[self.model.get_layer(self.layerName).output, self.model.output])

    def compute_heatmap(self, image, classIdx, eps=1e-8):
        with tf.GradientTape() as tape:
            tape.watch(self.gradModel.get_layer(self.layerName).variables)
            inputs = tf.cast(image, tf.float32)
            (convOutputs, predictions) = self.gradModel(inputs)

            if len(predictions) == 1:
                # Binary Classification
                loss = predictions[0]
            else:
                loss = predictions[:, classIdx]

        grads = tape.gradient(loss, convOutputs)

        castConvOutputs = tf.cast(convOutputs > 0, "float32")
        castGrads = tf.cast(grads > 0, "float32")
        guidedGrads = castConvOutputs * castGrads * grads

        convOutputs = convOutputs[0]
        guidedGrads = guidedGrads[0]

        weights = tf.reduce_mean(guidedGrads, axis=(0, 1))
        cam = tf.reduce_sum(tf.multiply(weights, convOutputs), axis=-1)

        (w, h) = (image.shape[2], image.shape[1])
        heatmap = cv2.resize(cam.numpy(), (w, h))

        numer = heatmap - np.min(heatmap)
        denom = (heatmap.max() - heatmap.min()) + eps
        heatmap = numer / denom
        heatmap = (heatmap * 255).astype("uint8")

        return heatmap


    def overlay_heatmap(self, heatmap, image, alpha=0.5, colormap=cv2.COLORMAP_HOT):
        heatmap = cv2.applyColorMap(heatmap, colormap)
        output = cv2.addWeighted(image, alpha, heatmap, 1 - alpha, 0)

        return (heatmap, output)

In [None]:
class GRADCamLogger(tf.keras.callbacks.Callback):
    def __init__(self, validation_data, layer_name):
      super(GRADCamLogger, self).__init__()
      self.validation_data = validation_data
      self.layer_name = layer_name

    def on_epoch_end(self, logs, epoch):
      images = []
      grad_cam = []

      ## Initialize GRADCam Class
      cam = GradCAM(model, self.layer_name)

      for image in self.validation_data:
        image = np.expand_dims(image, 0)
        pred = model.predict(image)
        classIDx = np.argmax(pred[0])
  
        ## Compute Heatmap
        heatmap = cam.compute_heatmap(image, classIDx)
        
        image = image.reshape(image.shape[1:])
        image = image*255
        image = image.astype(np.uint8)

        ## Overlay heatmap on original image
        heatmap = cv2.resize(heatmap, (image.shape[0],image.shape[1]))
        (heatmap, output) = cam.overlay_heatmap(heatmap, image, alpha=0.5)

        images.append(image)
        grad_cam.append(output)

      wandb.log({"images": [wandb.Image(image)
                            for image in images]})
      wandb.log({"gradcam": [wandb.Image(cam)
                            for cam in grad_cam]})

In [None]:
## Prepare sample images to run your GradCam on. 
sample_images, sample_labels = val_generator[20]
sample_images.shape, sample_labels.shape

> ## Interactive Monitoring Window <br>
You can scroll up and down across charts, view system utilization, model architecture. <br>
**First Set of Panels (Charts):** <br>
> 1. Training accuracy, Validation accuracy vs Step
> 2. Training loss, Validation loss vs Step
> 3. Epochs vs Step

> **Second Set of Panels (Activation maps):** <br>
> 1. Gradient Cam activation maps per class vs Step
> 2. Images vs Step
> 3. Examples vs Step

In [None]:
wandb.init(project="test", entity="team-7")

> ## Accuracy <br>
- The transfer Learning using VGG16 conv_base and the augmented data reached a validation **accuracy** of **98.42%**

In [None]:
# History_Rs = resnetRs.fit(train_generator,
#          validation_data=val_generator,
#          steps_per_epoch=len(x_train_rs) // train_bs, epochs=epochs,
#           validation_steps =len(x_test_rs)//valdi_bs, verbose = 1)


history = model.fit(train_generator,
                          validation_data=val_generator,
                          steps_per_epoch=len(x_train_vgg) // train_bs, 
                          epochs=epochs,
                          validation_steps =len(x_test_vgg)//valdi_bs, 
                          verbose = 1,
                          callbacks=[WandbCallback(data_type="image", validation_data=(sample_images, sample_labels)),
                                     GRADCamLogger(sample_images, layer_name='block5_conv3'),
                                     early_stopping])


In [None]:
model.save('./vgg_tl_model.h5')

In [None]:
new_model = tf.keras.models.load_model('./my_model.h5')

In [None]:
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']
loss = history.history['loss']
val_loss = history.history['val_loss']

epochs = range(len(acc))

plt.plot(epochs, acc, 'bo', label='Training acc')
plt.plot(epochs, val_acc, 'b', label='Validation acc')
plt.title('Training and validation accuracy')
plt.legend()

plt.figure()

plt.plot(epochs, loss, 'bo', label='Training loss')
plt.plot(epochs, val_loss, 'b', label='Validation loss')
plt.title('Training and validation loss')
plt.legend()

plt.show()