<!--NOTEBOOK_INFORMATION-->
<img id="r-1060983" data-claire-element-id="1061343" src="http://www.siteduzero.com/favicon.ico" alt="Image utilisateur">
    <p>
        **<font color='#D2691E'size="6">Projet n°7 : Indexations automatiques d'images, ANNEXE (1/2)</font>**.
    </p>
    <p>
        Ce notebook aborde la classification des images du projet n°7 avec l'approche des réseaux de neuronnes de convolution.
    Il fait appel à la technique du <b>transfer learning</b>, mise en application sur le <b>VGG16</b>, avec <b>extraction des feature des couches de convolution</b> et <b>entraînement des couches fully connected</b>.
    On y fait varier différents paramètres afin de comprendre la sensibilité du réseau à ces derniers.
    </p>

<p>
    <center>
        **<font color='	#D2691E'size="6">PLAN</font>**
    </center>
</p>

<p>
        **<font color='#D2691E'size="4">0) Libraries and functions import</font>**
</p>
<p>
        **<font color='#D2691E'size="4">I) CNN parameters calibration</font>**
</p>
<p>
        **<font color='#D2691E'size="4">II) Looping over the CNN parameters</font>**
</p>

<p>
        **<font color='#D2691E'size="4">0) Libraries and functions import</font>**
</p>

In [1]:
import pickle 
import pandas as pd
import numpy as np
import random
import time

In [2]:
from context import datasources_path, pickles_path, temp_files_path

In [3]:
from functions_tailored import select_N_random_races
from functions_tailored import build_train_and_test_datasets

In [4]:
RELOAD_EVALUATION = True

In [5]:
L_evaluation_cols = ['races',
                     'training_len',
                     'testing_len',
                     'iter_number',
                     'n_races',
                     'random_selection_number',
                     'batch_size',
                     'learning_rate',
                     'fitting_time',
                     'prediction_time',
                     'epochs_losses',
                     'epochs_accuracies',
                     'epochs_val_losses',
                     'epochs_val_accuracies',
                     'test_loss',
                     'test_accuracy',
                     'is_augmented',
                     'optimizer',
                     'run']

In [6]:
if RELOAD_EVALUATION == True :
    df_evaluation_lr = pd.read_csv('df_evaluation_learning_rates.csv')
    df_evaluation_lr.drop('Unnamed: 0', axis=1, inplace=True)

elif RELOAD_EVALUATION == False :
    df_evaluation_lr = pd.DataFrame(columns = L_evaluation_cols)
df_evaluation_lr

Unnamed: 0,races,training_len,testing_len,iter_number,n_races,random_selection_number,batch_size,learning_rate,fitting_time,prediction_time,epochs_losses,epochs_accuracies,epochs_val_losses,epochs_val_accuracies,test_loss,test_accuracy,is_augmented,optimizer,learning_rate_categ,run
0,"['afghan_hound', 'boxer', 'cairn', 'collie', '...",1637,552,0,12,0,100,0.000046,3077,125,"[11.177390171632402, 5.857689602159346, 2.6551...","[0.18174409032013714, 0.5069274632252613, 0.74...","[15.50204170040968, 15.00998678440001, 12.2796...","[0.0, 0.029268292483033205, 0.2048780423838918...",3.816269,0.722826,False,sgd,"[1e-5,5e-5]",run_1
1,"['afghan_hound', 'boxer', 'cairn', 'collie', '...",1637,552,1,12,0,100,0.000089,3454,122,"[10.441518563610224, 5.4293057353105985, 3.884...","[0.23553382233088835, 0.5973920026443407, 0.71...","[15.783933965171256, 12.326112328506097, 12.78...","[0.002439024335727459, 0.19268292479398774, 0....",3.838663,0.733696,False,sgd,"[5e-5,1e-4]",run_1
2,"['afghan_hound', 'boxer', 'cairn', 'collie', '...",1637,552,2,12,0,100,0.000443,2209,122,"[9.8417969667144, 5.32107679107467, 4.79954731...","[0.31621842245406606, 0.6479217584966739, 0.68...","[12.235719762197355, 11.661158957132479, 11.52...","[0.21219512674866653, 0.2682926887419166, 0.27...",7.138068,0.543478,False,sgd,"[1e-4,5e-4]",run_1
3,"['afghan_hound', 'boxer', 'cairn', 'collie', '...",1637,552,3,12,0,100,0.000753,1477,122,"[10.426884437054275, 7.350310457364854, 9.7848...","[0.29910350730604723, 0.5264873691662211, 0.38...","[16.118097258777155, 16.118097258777155, 16.11...","[0.0, 0.0, 0.0, 0.0]",10.832995,0.327899,False,sgd,"[5e-4,1e-3]",run_1
4,"['afghan_hound', 'boxer', 'cairn', 'collie', '...",1637,552,4,12,0,100,0.004071,1475,122,"[14.32642020184331, 14.633708820949265, 14.633...","[0.09942950298000179, 0.09209453961838526, 0.0...","[16.118097258777155, 16.118097258777155, 16.11...","[0.0, 0.0, 0.0, 0.0]",15.008516,0.068841,False,sgd,"[1e-3,5e-3]",run_1
5,"['afghan_hound', 'boxer', 'cairn', 'collie', '...",1637,552,5,12,0,100,0.005730,1477,122,"[14.377608573621332, 14.35784860733766, 14.357...","[0.10268948660721697, 0.10920945234359913, 0.1...","[16.118097258777155, 16.118097258777155, 16.11...","[0.0, 0.0, 0.0, 0.0]",14.804120,0.081522,False,sgd,"[5e-3,1e-2]",run_1
6,"['brabancon_griffon', 'cairn', 'cocker_spaniel...",1510,510,6,12,1,100,0.000027,1452,113,"[12.859274773210181, 9.429001272356553, 6.2099...","[0.10424028090843042, 0.26855123606461095, 0.4...","[15.520961927989173, 15.956519313590237, 16.03...","[0.00793650775911316, 0.0, 0.0, 0.002645502586...",6.095660,0.547059,False,sgd,"[1e-5,5e-5]",run_1
7,"['brabancon_griffon', 'cairn', 'cocker_spaniel...",1510,510,7,12,1,100,0.000094,1780,113,"[10.854263379801289, 5.383011704198885, 3.2654...","[0.2049469942812785, 0.5927561940658219, 0.738...","[16.11809730529785, 16.115518347926873, 16.118...","[0.0, 0.0, 0.0, 0.0, 0.0]",5.329826,0.633333,False,sgd,"[5e-5,1e-4]",run_1
8,"['brabancon_griffon', 'cairn', 'cocker_spaniel...",1510,510,8,12,1,100,0.000462,1361,112,"[9.503772049826363, 6.354559300223846, 6.12258...","[0.34098940186018234, 0.5697879856551072, 0.60...","[16.11809730529785, 16.11809730529785, 16.1180...","[0.0, 0.0, 0.0, 0.0]",8.239412,0.484314,False,sgd,"[1e-4,5e-4]",run_1
9,"['brabancon_griffon', 'cairn', 'cocker_spaniel...",1510,510,9,12,1,100,0.000946,1362,113,"[10.594130981094846, 9.908407009111276, 10.101...","[0.2985865757488004, 0.3780918724755938, 0.370...","[16.11809730529785, 16.11809730529785, 16.1180...","[0.0, 0.0, 0.0, 0.0]",12.452109,0.227451,False,sgd,"[5e-4,1e-3]",run_1


<p>
        **<font color='#D2691E'size="4">I) CNN parameters calibration</font>**
</p>

In [7]:
from keras.applications.vgg16 import VGG16
from keras.layers import Flatten,Dense,Dropout
from keras.models import Model
from keras.callbacks import EarlyStopping
from keras.optimizers import SGD, Adam

Using TensorFlow backend.


In [8]:
L_batch_sizes = [200,300]
dict_lr_ranges = {
    0:[5e-6,1e-5],
    1:[1e-5,5e-5],
    2:[5e-5,1e-4]
}
EPOCHS = 100 
nn_callbacks = EarlyStopping(monitor='val_loss', min_delta=0, patience=3, verbose=0, mode='auto')
AUGMENTED = False
RUN = 'run_2'

<p>
        **<font color='#D2691E'size="4">II) Looping over the CNN parameters</font>**
</p>

In [9]:
iter_start_time = time.time()
ITER_NUMBER = 0
RACE_NUMBER = 10

for RANDOM_SELECTION_INCR in range(0,30):
    
    print('total_time : %s seconds'% (time.time() - iter_start_time))
    print('random_selection n° %s'%RANDOM_SELECTION_INCR)
    print()
    L_filtered_races = select_N_random_races(RACE_NUMBER)
    print('filtered_races : %s'%L_filtered_races)
    print()
    print('building the train and test datasets ...')
    print()
    dict_data = build_train_and_test_datasets(L_filtered_races,'label_encoder_learning_rate',AUGMENTED)

    X_train = dict_data['X_train']
    X_test = dict_data['X_test']
    y_train = dict_data['y_train']
    y_test = dict_data['y_test']
            
    training_len = X_train.shape[0]
    testing_len = X_test.shape[0]
            
    print('looping over the vgg16 parameters ...')
    print()
            
    for BATCH_SIZE in L_batch_sizes:
        for key in dict_lr_ranges.keys():
            lr_interval = dict_lr_ranges[key]
            LEARNING_RATE = random.uniform(lr_interval[0], lr_interval[1])
            init_training_time = time.time()
            print()
            print()
            print("#############################################################################################")
            print("training the model with params :")
            print("Batch size = %s | Learning rate = %s"%(BATCH_SIZE,LEARNING_RATE))
            
            for optimizer in ['adam','sgd']:
                
                # Charger VGG-16 pré-entraîné sur ImageNet et sans les couches fully-connected
                model = VGG16(weights="imagenet", include_top=False, input_shape=(224, 224, 3))

                # choose the layers which are updated by training
                for layer in model.layers:
                    layer.trainable = False

                # Récupérer la sortie de ce réseau
                x = model.output
                # Ajouter la fonction Flatten à la nouvelle couche fully-connected pour la classification à 2 classes
                predictions = Flatten()(x)

                # Définir le nouveau modèle
                new_model = Model(inputs=model.input, outputs=predictions)

                # Ajout d'une couche Fully connected à 4096 neurones ayant une fonction d'activation "relu"
                x = new_model.output
                predictions = Dense(4096, activation='relu')(x)
                new_model = Model(inputs=new_model.input, outputs=predictions)

                """Ajout d'une couche Dropout"""
                x = new_model.output
                dropout = Dropout(0.2)(x)
                new_model = Model(inputs=new_model.input, outputs=dropout)


                # Ajout d'une couche supplémentaire Fully connected à 4096 neurones ayant une fonction d'activation "relu"
                x = new_model.output
                predictions = Dense(4096, activation='relu')(x)
                new_model = Model(inputs=new_model.input, outputs=predictions)

                """Ajout d'une couche Dropout"""
                x = new_model.output
                dropout = Dropout(0.2)(x)
                new_model = Model(inputs=new_model.input, outputs=dropout)

                # Ajout d'une couche Fully connected à 2 neurones ayant une fonction d'activation "softmax"
                x = new_model.output
                predictions = Dense(RACE_NUMBER, activation='softmax')(x)
                new_model = Model(inputs=new_model.input, outputs=predictions)


                # Compiler le modèle
                if optimizer == 'adam':
                    nn_optimizer = Adam(lr=LEARNING_RATE, beta_1=0.9, beta_2=0.999, epsilon=None, decay=0.0, amsgrad=False)
                if optimizer == 'sgd':
                    nn_optimizer = SGD(lr=LEARNING_RATE, momentum=0.9)
                new_model.compile(loss="categorical_crossentropy", optimizer=nn_optimizer, metrics=["accuracy"])

                init_fitting_time = time.time()
                print('fitting the model ...')
                print('optimizer : %s'%optimizer)
                print()
                # Entraîner sur les données d'entraînement (X_train, y_train)
                history = new_model.fit(X_train,
                                        y_train,
                                        validation_split=0.25,
                                        epochs=EPOCHS,
                                        callbacks = [nn_callbacks],
                                        batch_size=BATCH_SIZE,
                                        verbose=1)

                fitting_time = int(time.time() - init_fitting_time)
                print('fitting_time : %s seconds'%fitting_time)
                print()

                init_prediction_time = time.time()
                print('computing the accuracy on the test data ...')
                print()
                performances = new_model.evaluate(X_test, y_test, verbose=0)
                test_loss = performances[0]
                test_accuracy = performances[1]
                print("test_loss : %s | test_accuracy : %s"%(test_loss,test_accuracy))
                prediction_time = int(time.time() - init_prediction_time)
                print('prediction_time : %s seconds'%prediction_time)
                print()



                dict_evaluation = {
                    'training_len':training_len,
                    'testing_len':testing_len,
                    'iter_number':ITER_NUMBER,
                    'n_races':RACE_NUMBER,
                    'races':L_filtered_races,
                    'random_selection_number':RANDOM_SELECTION_INCR,
                    'batch_size':BATCH_SIZE,
                    'learning_rate':LEARNING_RATE,
                    'fitting_time':fitting_time,
                    'prediction_time':prediction_time,
                    'epochs_losses':history.history['loss'],
                    'epochs_accuracies':history.history['acc'],
                    'epochs_val_losses':history.history['val_loss'],
                    'epochs_val_accuracies':history.history['val_acc'],
                    'test_loss':test_loss,
                    'test_accuracy':test_accuracy,
                    'is_augmented':AUGMENTED,
                    'optimizer':optimizer,
                    'run':RUN
                }

                df_evaluation_lr = df_evaluation_lr.append(dict_evaluation, ignore_index = True)
                df_evaluation_lr.to_csv('df_evaluation_learning_rates.csv', header=True)

                print('CSV written !')
                print()
                ITER_NUMBER+=1
                print('training_time : %s seconds'% (time.time() - init_training_time))
                print()

total_time : 0.0 seconds
random_selection n° 0

filtered_races : ['afghan_hound', 'boston_bull', 'cocker_spaniel', 'dandie_dinmont', 'dhole', 'doberman', 'english_setter', 'german_short_haired_pointer', 'lhasa', 'walker_hound']

building the train and test datasets ...

looping over the vgg16 parameters ...



#############################################################################################
training the model with params :
Batch size = 200 | Learning rate = 7.2683810231957375e-06
fitting the model ...
optimizer : adam

Train on 960 samples, validate on 320 samples
Epoch 1/100


KeyboardInterrupt: 