# Project AML

---


## Parte B: Feature Extraction based on Inception-V3 + SVM for Prediction

## Google Colab Setting

---



In [None]:
""" Fix seed for reproducibility """
import numpy
seed = 42
numpy.random.seed(seed)

In [0]:
""" Access to Drive directory """
from google.colab import drive
drive.mount('/content/gdrive/')

## Import Library

---




In [None]:
""" Library """
import numpy as np
import math
from keras.preprocessing.image import ImageDataGenerator
from keras.models import Model
from keras.applications.inception_v3 import InceptionV3
from sklearn.svm import SVC
import matplotlib.pyplot as plt
import time
from pyGPGO.GPGO import GPGO
from pyGPGO.covfunc import squaredExponential
from pyGPGO.acquisition import Acquisition
from pyGPGO.surrogates.RandomForest import RandomForest

# A) Pre-Processing Part

---
In questa sezione vengono definite le funzioni per:

- Eseguire la data augmentation sui dati di training
- Estrarre le feature utilizzando la tecnica del Transfer Learning chiamata Feature Extraction
- Suddividire le feature estratte in modalità supervisionato, ovvero Data e Label

In [0]:
def train_data(path):
    
    """ 
    This function made:
    - data augmentation on training data
    - feature extraction based on pre-trained model
    - split data in data and label
    """

    ''' Data '''
    generator_data = ImageDataGenerator(
        rescale=1. / 255,  # Normalizing all channels [0-1]
        rotation_range=30,  # Random rotation up to 45° (both verseses) 
        height_shift_range=0.1,  # Random vertically translation (up or down) up to 20%
        width_shift_range=0.1,  # Random horizontally translation (left or right) up to 20%
        horizontal_flip=True,  # Random mirroring of the image
        brightness_range=(0.1, 1.2),  # Random brigthening of the image
        fill_mode='wrap')  # Fill the images copying the nearest pixel

    data_train_generator = generator_data.flow_from_directory(path,
                                                              target_size=(224, 224),
                                                              batch_size=batch_size,
                                                              class_mode=None,
                                                              shuffle=False)

    nb_train_samples = len(data_train_generator.filenames)

    predict_size_train = int(math.ceil(nb_train_samples / batch_size))

    train_data = model.predict_generator(data_train_generator, predict_size_train)

    print('training data created')

    ''' Label'''
    generator_label = ImageDataGenerator(rescale=1. / 255)

    label_train_generator = generator_label.flow_from_directory(path,
                                                                target_size=(224, 224),
                                                                batch_size=batch_size,
                                                                class_mode='categorical',
                                                                shuffle=False)

    train_labels = label_train_generator.classes

    print('training label created')

    return train_data, train_labels

In [0]:
def validation_data(path):

    """ 
    This function made:
    - feature extraction based on pre-trained model
    - split data in data and label
    """

    ''' Data '''
    generator_data = ImageDataGenerator(rescale=1. / 255)

    data_validation_generator = generator_data.flow_from_directory(path,
                                                                   target_size=(224, 224),
                                                                   batch_size=batch_size,
                                                                   class_mode=None,
                                                                   shuffle=False)

    nb_validation_samples = len(data_validation_generator.filenames)

    predict_size_validation = int(math.ceil(nb_validation_samples / batch_size))

    validation_data = model.predict_generator(data_validation_generator, predict_size_validation)

    print('validation data created')

    ''' Label'''
    generator_label = ImageDataGenerator(rescale=1. / 255)

    label_validation_generator = generator_label.flow_from_directory(path,
                                                                     target_size=(224, 224),
                                                                     batch_size=batch_size,
                                                                     class_mode='categorical',
                                                                     shuffle=False)

    validation_labels = label_validation_generator.classes

    print('validation label created')

    return validation_data, validation_labels

In [0]:
def test_data(path):
    
    """ 
    This function made:
     - feature extraction based on pre-trained model
     - split data in data and label
    """

    ''' Data '''
    generator_data = ImageDataGenerator(rescale=1. / 255)

    data_test_generator = generator_data.flow_from_directory(path,
                                                             target_size=(224, 224),
                                                             batch_size=batch_size,
                                                             class_mode=None,
                                                             shuffle=False)

    nb_test_samples = len(data_test_generator.filenames)

    predict_size_test = int(math.ceil(nb_test_samples / batch_size))

    test_data = model.predict_generator(data_test_generator, predict_size_test)

    print('test data created')

    ''' Label'''
    generator_label = ImageDataGenerator(rescale=1. / 255)

    label_test_generator = generator_label.flow_from_directory(path,
                                                               target_size=(224, 224),
                                                               batch_size=batch_size,
                                                               class_mode='categorical',
                                                               shuffle=False)

    test_labels = label_test_generator.classes

    print('test label created')

    return test_data, test_labels


# B) Model Part

---
Viene importato un modello pre-trained presente su keras, ovvero Inception-V3 e vengono successivamente eliminati i layer da non utilizzare.


In [0]:
def model():

    """
    This function
    - import model and weight of pretained network
    - define structure of network
    - print summary of network
    """
  
    base_model = InceptionV3(weights='imagenet', include_top=True)
    model = Model(inputs=base_model.input, outputs=base_model.get_layer('avg_pool').output)
    model.summary()

    return model

In [0]:
def objective(C, gamma):
 
    """
    This function define SVM and hyper-parameter to optimize

    :param C: Penality Parameter of the error term
    :param gamma: kernel coefficient

    :return: Score of validation data
    """

    ''' Define SVM '''
    svm_model_linear = SVC(kernel='rbf', C=C, gamma=gamma).fit(x_train, y_train)

    ''' Evaluate SVM on Validation data '''
    score = svm_model_linear.score(x_validation, y_validation)
    
    return score

# C) Ottimizzazione con SMBO

Viene definita la fase di ottimizzazione bayesiana..


In [0]:
def SMBO(model, acquisition):

    """
    Define SMBO procedure for obtain best hyper-parameter

    :param model: Surrogate Model
    :param acquisition: Acquisition function

    """

    np.random.seed(42)

    ''' Start time '''
    start_time = time.time()

    ''' Define parameter range to evaluate '''
    param = {'C': ('cont', [0.1, 1000]), 'gamma': ('cont', [0.001, 1])}

    ''' Define GPGO function '''
    gpgo = GPGO(model, acquisition, objective, param, n_jobs=1)

    ''' Run Evaluation GPGO '''
    gpgo.run(max_iter=120, init_evals=80)

    ''' Print total time '''
    print("--- %s seconds ---" % (time.time() - start_time))

    ''' Plot History of best seen'''
    plt.plot(gpgo.history)
    plt.title('Accuracy through iteration')
    plt.ylabel('Accuracy')
    plt.xlabel('Iteration')
    plt.show()

    print(gpgo.getResult())

# Main


---

In questa fase viene eseguita:

- La fase di Pre-processing
- La fase di Modelling
- La fase di ottimizzazione bayesiana.



In [0]:
""" Batch size """ 
batch_size = 16

In [0]:
""" path in drive """
path_train = '/content/gdrive/My Drive/Colab Notebooks/PROJECT_AML/Dataset/TrainingSet/'
path_validation = '/content/gdrive/My Drive/Colab Notebooks/PROJECT_AML/Dataset/ValidationSet/'
path_test = '/content/gdrive/My Drive/Colab Notebooks/PROJECT_AML/Dataset/TestSet/'

In [0]:
""" Import model """
model = model()

In [0]:
""" Create training data """
x_train, y_train = train_data(path_train)

""" Save Training data """
np.save('x_train.npy', x_train)
np.save('y_train.npy', y_train)

In [0]:
""" Create validation data """
x_validation, y_validation = validation_data(path_validation)

""" Save Validation data """
np.save('x_validation.npy', x_validation)
np.save('y_validation.npy', y_validation)

In [0]:
""" Create test data """
x_test, y_test = test_data(path_test)

""" Save test data """
np.save('x_test.npy', x_test)
np.save('y_test.npy', y_test)

In [0]:
""" When want import feature from .npy file """

""" Import training data """
x_train = np.load('x_train.npy')
y_train = np.load('y_train.npy')

""" Import validation data """
x_validation = np.load('x_validation.npy')
y_validation = np.load('y_validation.npy')

""" Import test data """
x_test = np.load('x_test.npy')
y_test = np.load('y_test.npy')

In [0]:
""" Manual Tuning """
C = 100
gamma = 0.01
svm_model_linear = SVC(kernel='rbf', C=C, gamma=gamma).fit(x_train, y_train)
score = svm_model_linear.score(x_validation, y_validation)
score1 = svm_model_linear.score(x_test, y_test)
print('Accuracy on validation', score)
print('Accuracy on Testing', score1)

In [0]:
""" Optimization phase """
model = RandomForest()
acquisition = Acquisition(mode='ProbabilityImprovement')
SMBO(model, acquisition)

In [0]:
""" Test Best Hyper-Parameter """
C = 28.867940037741008
gamma = 0.0010092866796091596
svm_model_linear = SVC(kernel='rbf', C=C, gamma=gamma).fit(x_train, y_train)
score = svm_model_linear.score(x_test, y_test)
print('Accuracy on Testing', score)