# **Project AML**

# **Parte A:** Fine tuning based on VGG16


<h2>1. Importing Library</h2>

In [0]:
import numpy

from keras.callbacks import EarlyStopping, ModelCheckpoint
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Sequential
from keras.layers.core import Flatten, Dense, Dropout
from keras.layers.convolutional import Convolution2D, MaxPooling2D, ZeroPadding2D
from keras.optimizers import Adam, SGD

import matplotlib.pyplot as plt
import datetime

In [0]:
''' Fix seed for reproducibility '''
seed = 42
numpy.random.seed(seed)

In [0]:
''' Access to Drive directory '''
from google.colab import drive
drive.mount('/content/gdrive/')

<h2>2. Pre-Processing: Data Augmentation</h2>


In [0]:
# THIS IS GENERAL FUNCTION FOR DATA AUGMENTATION, WITHOUT ARGOMENT OF IMAGEDATAGENERATOR..

def data_augmentation(path):

    """
    This function make data augmentation and generate data from directory, only for training.

    :param path: is path of train

    :return: data generated from directory

    """

    ''' TO DO ARGOMENTI PER DATA AUGMENTATION'''
    generator = ImageDataGenerator(rescale=1. / 255)

    data = generator.flow_from_directory(path,
                                         target_size=(224, 224),
                                         batch_size=32,
                                         class_mode='categorical',
                                         shuffle=True)

    return data


# THIS IS REAL FUNCTION FOR DATA AUGMENTATION, WITH ARGOMENT OF IMAGEDATAGENERATOR
  

def data_augmentation1(path):

    """
    This function make data augmentation and generate data from directory, only for training.

    :param path: is path of train

    :return: data generated from directory

    """

    ''' TO DO ARGOMENTI PER DATA AUGMENTATION'''
    generator = ImageDataGenerator(
                    rescale=1./255, # Normalizing all channels [0-1]
                    rotation_range=30, # Random rotation up to 45° (both verseses) 
                    height_shift_range=0.2, # Random vertically translation (up or down) up to 20%
                    width_shift_range=0.2, # Random horizontally translation (left or right) up to 20%
                    horizontal_flip=True, # Random mirroring of the image
                    brightness_range=(0.1, 1.2), # Random brigthening of the image
                    fill_mode='wrap') # Fill the images copying the nearest pixel
    
    data = generator.flow_from_directory(path,
                                         target_size=(224, 224),
                                         batch_size=32,
                                         class_mode='categorical',
                                         shuffle=True)

    return data


def data_generator(path):

    """
    This function generate data from directory

    :param path: is path of data

    :return:
    """

    generator = ImageDataGenerator(rescale=1. / 255)
    data = generator.flow_from_directory(path,
                                         target_size=(224, 224),
                                         batch_size=32,
                                         class_mode='categorical',
                                         shuffle=False)
    return data
  
  
def test_generator(path):

    """
    This function generate data from directory

    :param path: is path of data

    :return:
    """

    generator = ImageDataGenerator(rescale=1. / 255)
    data = generator.flow_from_directory(
                          path, # target directory
                          target_size=(224, 224), # all images will be resized to 224x224
                          batch_size=1,
                          #color_mode="rgb",
                          class_mode='categorical', # None, to return only the images.
                          shuffle=False, # False, because we need to yield the images in “order”, to predict the outputs and match them with their unique filenames.
                          seed=seed)

    return data


<h2>3. Modelling</h2>

1) Import structure and weight of VGG16

2) Edit structure of VGG16, delete some layer and add new.

3) Define Training

4) Define Testing

In [0]:
def vgg16(weights_path=None):

    """
    This function create structure of VGG16 and add weights

    :param weights_path: path of weight of VGG16

    :return: model with weights added

    """

    model = Sequential()
    model.add(ZeroPadding2D((1, 1), input_shape=(224, 224, 3)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(64, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(128, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(256, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(ZeroPadding2D((1, 1)))
    model.add(Convolution2D(512, (3, 3), activation='relu'))
    model.add(MaxPooling2D((2, 2), strides=(2, 2)))

    model.add(Flatten())
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(4096, activation='relu'))
    model.add(Dropout(0.5))
    model.add(Dense(1000, activation='softmax'))

    if weights_path:
        model.load_weights(weights_path)

    return model


def vgg16_edit(path):

    """
    This function modify structure of VGG16 and add new layer.

    :param path: path of weights

    :return: new model modified
    """

    model = vgg16(path)

    # Freezing Vgg16 layer
    for layer in model.layers[:32]:
        layer.trainable = False

    
    # number of layer to delete
    number_delete_layer = 5

    for i in range(number_delete_layer):
        model.pop()

    # Add new layers
    model.add(Dense(2884, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(1482, activation='relu'))
    model.add(Dropout(0.25))
    model.add(Dense(102, activation='softmax'))

    return model


def training(model, loss, optimizer, train, validation, epoch):

    """
    This function compile model, training data, calculate execution time and plot result.

    :param model: CNN model
    :param loss: loss function
    :param optimizer: optimizer function
    :param train: train data
    :param validation: validation data
    :param epoch: epochs

    :return: model trained on train data
    """

    # Compile model
    model.compile(loss=loss,
                  optimizer=optimizer,
                  metrics=['accuracy'])

    # Start timer
    time_start = datetime.datetime.now()

    # Fit model
    result = model.fit_generator(train, 
                                 validation_data=validation, 
                                 validation_steps=5, 
                                 steps_per_epoch=5,  
                                 callbacks=[earlystopper, checkpointer],
                                 #callbacks=[earlystopper],
                                 epochs=epoch)

    # Stop timer
    time_stop = datetime.datetime.now()

    # Print time
    print("Execution time:", (time_stop - time_start).total_seconds(), "secondi")
    
    print('Loss on train set:', round((result.history['loss'])[-1], 3))
    print('Loss on validation set:', round((result.history['val_loss'])[-1], 3))
    
    print('Accuracy on train set:', round((result.history['acc'])[-1], 3))
    print('Accuracy on validation set:', round((result.history['val_acc'])[-1], 3))

    # Show Plot
    show_history(result, 'acc', 'val_acc', 'accuracy', 'epoch', 'train', 'validation', 1, epoch)
    show_history(result, 'loss', 'val_loss', 'loss', 'epoch', 'train', 'validation', None, epoch)

    return model


def show_history(result, measure1='', measure2='', metrics='', unit='', set1='', set2='', acc=None, epochs=100):

    """
    This function is util for plot history

    :param result: contain result data
    :param measure1: e.g. 'acc'
    :param measure2: e.g. 'val_acc
    :param metrics:  e.g. accuracy
    :param unit: e.g. epoch
    :param set1: e.g. training set
    :param set2: e.g. validation set
    :param epochs: e.g. number of epoch for plot
    :param acc:

    :return:
    """

    plt.plot(result.history[measure1])
    plt.plot(result.history[measure2])
    axes = plt.gca()
    axes.set_xlim([0, epochs])
    axes.set_ylim([0, acc])

    plt.ylabel(metrics)
    plt.xlabel(unit)
    plt.legend([set1, set2], loc='upper left')
    plt.show()



def testing(model, test, batch):
    
    """
    This function testing the test data and give in output the result
    
    :param model: model trained in training.
    :param test: data to test
    :param batch: dimension of batch

    """

    score = model.evaluate_generator(test, batch)

    print(score[0], 'loss')
    print(score[1], 'accuracy')


<h2>4. Experiment</h2>

In this part:

1) Define path of data

2) Define Hyper-parameter

3) Run Pre-processing Part

4) Run Model Part

In [0]:
''' path in drive '''
path_train = '/content/gdrive/My Drive/Colab Notebooks/dataset/aml-project/TrainingSet/'
path_validation = '/content/gdrive/My Drive/Colab Notebooks/dataset/aml-project/ValidationSet/'
path_test = '/content/gdrive/My Drive/Colab Notebooks/dataset/aml-project/TestSet/'

path_weights = '/content/gdrive/My Drive/Colab Notebooks/model/vgg16_weights_tf_dim_ordering_tf_kernels.h5'
path_best_model = '/content/gdrive/My Drive/Colab Notebooks/output_file.hdf5'

In [0]:
''' Hyper-parameter '''
batch_size = 32
epochs = 300
loss = 'categorical_crossentropy'
lr = 0.00005
decay = lr / epochs
adam = Adam(lr=lr, beta_1=0.9, beta_2=0.999, epsilon=None, decay=decay, amsgrad=False)
optimizer = adam

checkpointer = ModelCheckpoint(monitor='val_loss', filepath='/content/gdrive/My Drive/Colab Notebooks/' + "output_file" + ".hdf5", verbose=1, save_best_only=True)
earlystopper = EarlyStopping(monitor='val_loss', patience=30, verbose=1)

In [0]:
''' Pre-Processing'''
train_generator = data_augmentation1(path_train)
validation_generator = data_generator(path_validation)
test_generator = test_generator(path_test)

In [0]:
''' Structure of Model '''
model = vgg16_edit(path_weights)
print(model.summary())

In [0]:
''' Training model'''
training(model, loss, optimizer, train_generator, validation_generator, epochs)

In [0]:
''' Testing model'''
print("Testing")
time_start = datetime.datetime.now()
#model.load_weights(path_best_model)

# Fit model
testing(model, test_generator, batch=6148)
# Stop timer
time_stop = datetime.datetime.now()

# Print time
print("Execution time:", (time_stop - time_start).total_seconds(), "secondi")
