# <span style="color:#0b486b"> File Information</span>
***
File: **CNN_ImageNet.ipynb**  <br/>
Author: **Matthew Khoo**  <br/>
Last Updated: **23/07/2020**    <br/>
***

# <span style="color:#0b486b">Convolutional Neural Networks (CNN) for Image Classification </span>

In [None]:
%load_ext autoreload
%autoreload 2

In [None]:
import os
import matplotlib.pyplot as plt
plt.style.use('ggplot')
%matplotlib inline
import modules
from modules import SimplePreprocessor, AnimalsDatasetManager, DefaultModel

## <span style="color:#0b486b">Image Data Preprocessing </span>

In [None]:
# Create a dictionary from dataset directory
def create_label_folder_dict(adir):
    sub_folders= [folder for folder in os.listdir(adir)
                  if os.path.isdir(os.path.join(adir, folder))]
    label_folder_dict= dict()
    for folder in sub_folders:
        item= {folder: os.path.abspath(os.path.join(adir, folder))}
        label_folder_dict.update(item)
    return label_folder_dict

In [None]:
label_folder_dict= create_label_folder_dict("./datasets/Animals")

In [None]:
sp = SimplePreprocessor(width=32, height=32)
data_manager = AnimalsDatasetManager([sp])
data_manager.load(label_folder_dict, verbose=100)
data_manager.process_data_label()
data_manager.train_valid_test_split()

In [None]:
print(data_manager.X_train.shape, data_manager.y_train.shape)
print(data_manager.X_valid.shape, data_manager.y_valid.shape)
print(data_manager.X_test.shape, data_manager.y_test.shape)
print(data_manager.classes)

## <span style="color:#0b486b">Default Model Use Example </span>

In [None]:
network1 = DefaultModel(name='network1',
                       num_classes=len(data_manager.classes),
                       optimizer='sgd',
                       batch_size= 128,
                       num_epochs = 20,
                       learning_rate=0.5)

In [None]:
# Build the model and display summary of the model
network1.build_cnn()
network1.summary()

In [None]:
# Train the model with training data
network1.fit(data_manager, batch_size = 64, num_epochs = 20)

In [None]:
# Compute accuracy of trained model against the test data
network1.compute_accuracy(data_manager.X_test, data_manager.y_test)

In [None]:
# Plot the model's training progress
network1.plot_progress()

In [None]:
# Predict the labels for example test set
network1.predict(data_manager.X_test[0:10])

In [None]:
# Visualize the results of prediction for several images
network1.plot_prediction(data_manager.X_test, data_manager.y_test, data_manager.classes)

## <span style="color:#0b486b">Default Model Experiment with Varying Learning Rates</span>

In [None]:
vanilla_models = []
learningRates = [0.0001,0.001,0.005,0.01, 0.1] 
for i in range (len(learningRates)):
    # Other parameters are the same as the default model
    temp = DefaultModel(name='network'+str(i),
                       num_classes=len(data_manager.classes),
                       optimizer='sgd',
                       batch_size= 128,
                       num_epochs = 20,
                       learning_rate=learningRates[i])
    vanilla_models.append(temp)

In [None]:
j = 0
for network in vanilla_models:
    print("Learning rate of", str(learningRates[j]))
    network.build_cnn()
    network.fit(data_manager, batch_size = 64, num_epochs = 20)
    j += 1
    network.plot_progress()
    print("\n Next \n")

## <span style="color:#0b486b">Improving the Default Model </span>

In [None]:
from keras.preprocessing.image import ImageDataGenerator

In [None]:
""" Override the architecture of the default model with an improved model.
    Accommodate data augmentation in fitting the model.
    Implementation using blocks of n to fit the model.layers. 
    Block layers pattern [conv, batch norm, activation, conv, batch norm, activation, mean pool, dropout]
"""
class MyModel(DefaultModel):
    def __init__(self,
                 name='network1',
                 width=32, height=32, depth=3,
                 num_blocks=2,
                 feature_maps=32,
                 num_classes=4, 
                 drop_rate=0.2,
                 batch_norm = None,
                 is_augmentation = False,
                 activation_func='relu',
                 optimizer='adam',
                 batch_size=10,
                 num_epochs= 20,
                 learning_rate=0.0001,
                 verbose= True):
        super(MyModel, self).__init__(name, width, height, depth, num_blocks, feature_maps, num_classes, drop_rate, batch_norm, is_augmentation, 
                                        activation_func, optimizer, batch_size, num_epochs, learning_rate, verbose)
        
    def get_sub_block(self, first, current_block):
        layersArr = []
        numFilters = self.feature_maps[current_block]
        #print(numFilters)
        if first:
            conv = layers.Conv2D(numFilters, (3,3), strides = (1,1), padding='same', 
                               activation=self.activation_func, input_shape = (32,32,3))   # conv
        else:  # The following conv layers does not need starting input shape
            conv = layers.Conv2D(numFilters, (3,3), strides = (1,1), padding='same', 
                               activation=self.activation_func)   # conv
            
        batchNorm = layers.BatchNormalization()   # batch_norm
        activation = layers.Activation(self.activation_func)   # activation
        layersArr.append(conv)
        if self.batch_norm != None:   # only add this layer if attribute self.batch_norm is not None
            layersArr.append(batchNorm)
        layersArr.append(activation)
        return layersArr
    
    def build_cnn(self):
        #self.model = models.Sequential()
        first = True
        for i in range (self.num_blocks):
            # Layers for one block according to the described pattern
            # two sub-blocks of [conv, batch_norm, activation] where applicable
            for _ in range(2):
                for layer in self.get_sub_block(first, i):
                    self.model.add(layer)
                first = False
                  
            self.model.add(layers.AveragePooling2D(pool_size=(2,2), strides = (2,2), padding='same'))   # mean pool
            if self.drop_rate > 0.0:
                self.model.add(layers.Dropout(self.drop_rate))   #dropout
                
        # Copied from modules.py
        self.model.add(layers.Flatten())
        self.model.add(layers.Dense(self.num_classes, activation='softmax'))   #softmax 
        self.model.compile(optimizer=self.optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        
    def fit(self, data_manager, batch_size=None, num_epochs=None):
        batch_size = self.batch_size if batch_size is None else batch_size
        num_epochs = self.num_epochs if num_epochs is None else num_epochs
        
        # The augmentations done on each image
        data_aug = ImageDataGenerator(rotation_range = 15,
                                      width_shift_range = 0.15,
                                      height_shift_range = 0.15,
                                      shear_range = 0.15,
                                      zoom_range = 0.15,
                                      horizontal_flip = True,
                                      fill_mode = 'nearest'
                                      )
        self.model.compile(optimizer=self.optimizer, loss='sparse_categorical_crossentropy', metrics=['accuracy'])
        
        # Fitting with or without data augmentation generator
        if self.is_augmentation:
            self.history = self.model.fit_generator(data_aug.flow(x = data_manager.X_train, 
                                                                  y = data_manager.y_train, 
                                                                  batch_size = batch_size),
                                                    validation_data = (data_manager.X_valid, data_manager.y_valid), 
                                                    epochs = num_epochs, verbose= self.verbose)
        else:
            self.history = self.model.fit(x = data_manager.X_train, y = data_manager.y_train, 
                                          validation_data = (data_manager.X_valid, data_manager.y_valid), 
                                          epochs = num_epochs, batch_size = batch_size, verbose= self.verbose)

In [None]:
# Fitting the model with no drop rate (and also no batch normalization) as benchmark purposes
no_drop_out_network = MyModel(name='network1',
                     feature_maps=32,
                     num_classes=len(data_manager.classes),
                     num_blocks=4,
                     drop_rate= 0.0, # enter your final dropout rate value here
                     batch_norm=None,     
                     optimizer='adam',
                     learning_rate= 0.001)
no_drop_out_network.build_cnn()
no_drop_out_network.fit(data_manager)   # default batch size of 10 and epochs of 20 as defined in MyModel
no_drop_out_network.plot_progress()

### <span style="color:#0b486b">Experiment 1: Dropout Rates </span>

In [None]:
# Fitting the model with different drop rates using the new model
drop_models = []
dropRates = [0.2, 0.3, 0.4, 0.5] 
for i in range (len(dropRates)):
    # Other parameters are the same as cell above
    drop_temp = MyModel(name='dropout_network'+str(i),
                     feature_maps=32,
                     num_classes=len(data_manager.classes),
                     num_blocks=4,
                     drop_rate= dropRates[i], # enter your final dropout rate value here
                     batch_norm=None,     
                     optimizer='adam',
                     learning_rate= 0.001)
    drop_models.append(drop_temp)

In [None]:
j = 0
for network in drop_models:
    print("Drop rate of", str(dropRates[j]))
    network.build_cnn()
    network.fit(data_manager)   # default batch size of 10 and epochs of 20 as defined in MyModel
    j += 1
    network.plot_progress()
    print("\n Next \n")

### <span style="color:#0b486b">Experiment 2: Batch Normalization </span>

In [None]:
batch_norm_network = MyModel(name='batch_norm_network',
                     feature_maps=32,
                     num_classes=len(data_manager.classes),
                     num_blocks=4,
                     drop_rate= 0, 
                     batch_norm=True, #do batch norm    
                     optimizer='adam',
                     learning_rate= 0.01)
batch_norm_network.build_cnn()
batch_norm_network.summary()

In [None]:
batch_norm_network.fit(data_manager)   # default batch size of 10 and epochs of 20 as defined in MyModel
batch_norm_network.plot_progress()

### <span style="color:#0b486b">Experiment 3: Hyperparameter Tuning </span>

In [None]:
# Tune hyperparameters for fast training and best accuracy
# Arguably best model configurations
bestModel = MyModel(name='network_best',
                     feature_maps=16,
                     num_classes=len(data_manager.classes),
                     num_blocks=3,
                     drop_rate= 0.23,  
                     batch_norm=None,    
                     optimizer='adam',
                     learning_rate= 0.0003)
bestModel.build_cnn()

In [None]:
bestModel.summary()

In [None]:
bestModel.fit(data_manager, batch_size = 16)
bestModel.plot_progress()

### <span style="color:#0b486b">Experiment 4: Data Augmentation </span>

In [None]:
augmentation_network = MyModel(name='aug_network',
                     feature_maps=16,
                     num_classes=len(data_manager.classes),
                     num_blocks=3,
                     drop_rate= 0.23, # keep_prob = 1- 0.23
                     batch_norm=None,
                     is_augmentation= True,
                     optimizer='adam',
                     learning_rate= 0.0003)
augmentation_network.build_cnn()

In [None]:
augmentation_network.summary()

In [None]:
augmentation_network.fit(data_manager, batch_size = 16)
augmentation_network.plot_progress()

### <span style="color:#0b486b">Adversial Attacks PGD and FGM</span>

In [None]:
import numpy as np
from sklearn.metrics import accuracy_score
import cleverhans
from cleverhans.future.tf2.attacks import projected_gradient_descent, fast_gradient_method

In [None]:
# Plot images and their labels
def plot_images(row, col, images, labels):
    plt.clf()
    plt.figure(figsize = (2 * col, 2 * row))
    for i in range(row*col):
        plt.subplot(row, col, i + 1)
        plt.imshow((images[i] + 1.0)/2)
        plt.xlabel(labels[i])
        plt.grid(False)
        plt.tick_params(axis = "x", which = "both", bottom = False, labelbottom = False)  # remove x_tick
        plt.tick_params(axis = "y", which = "both", left = False, labelleft = False)    # remove y_tick
    plt.show()

In [None]:
current_model = MyModel(name='network_attack',
                     feature_maps=16,
                     num_classes=len(data_manager.classes),
                     num_blocks=3,
                     drop_rate= 0.23, # keep_prob = 1- 0.23
                     batch_norm=None,
                     is_augmentation= True,
                     optimizer='adam',
                     learning_rate= 0.0003)
current_model.build_cnn()
#current_model.fit(data_manager, batch_size = 16)

In [None]:
""" Note: All of the attacks, both PGD and FGSM are untargeted.
    References:
     - https://colab.research.google.com/github/andantillon/cleverhans/blob/master/tutorials/future/tf2/notebook_tutorials/mnist_fgsm_tutorial.ipynb#scrollTo=DlQ833TumOUC
"""
xTest = data_manager.X_test
yTest = data_manager.y_test

original_images = []
original_labels = []
for _ in range(20):
    random_index = np.random.randint(xTest.shape[0])
    original_images.append(xTest[random_index])
    original_labels.append(data_manager.classes[yTest[random_index]])

plot_images(5, 4, original_images, original_labels)   # original images and true labels plot

# Attacking procedure starts here
pgd_labels_pred = []
pgd_images = []

fgsm_labels_pred = []
fgsm_images = []

# Hyperparameters
eps = 0.0313
eta = 0.005
k = 20

# Attack for each image
for image in original_images:
    x_tensor = tf.convert_to_tensor(image.reshape((1,32,32,3)))  #reshape each original image
    
    # PGD attack
    adv_image = projected_gradient_descent(current_model.get_model(), x_tensor, eps = eps, 
                                           eps_iter = eta , nb_iter= k, norm = np.inf, targeted = False)
    adv_label_pred = current_model.predict(adv_image)
    pgd_labels_pred.append(adv_label_pred[0])
    pgd_images.append(np.reshape(adv_image, (32,32,3)))   #reshape back into proper image shape
    
    # FGSM attack
    adv2_image = fast_gradient_method(current_model.get_model(), x_tensor, eps = eps, norm = np.inf, targeted = False)
    adv2_label_pred = current_model.predict(adv2_image)
    fgsm_labels_pred.append(adv2_label_pred[0])
    fgsm_images.append(np.reshape(adv2_image, (32,32,3)))
    
# Map the labels to respective classes
pgd_labels_classes = [data_manager.classes[pred] for pred in pgd_labels_pred]
fgsm_labels_classes = [data_manager.classes[pred2] for pred2 in fgsm_labels_pred]

plot_images(5, 4, pgd_images, pgd_labels_classes)   # pgd adversial plot 
plot_images(5, 4, fgsm_images, fgsm_labels_classes)   # fgsm adversial plot

In [None]:
# Calculate Accuracy for each attack
pgd_accuracy = accuracy_score(original_labels, pgd_labels_classes)
fgsm_accuracy = accuracy_score(original_labels, fgsm_labels_classes)
print("PGS accuracy: "+ str(pgd_accuracy))
print("FGSM accuracy: "+ str(fgsm_accuracy))