|Nom | Prénom|
|---|---|
| AHOUNOU | Méryl |
| KEVORKIAN | Amandine |

## Imports

In [1]:
import datetime
import pathlib

import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import pickle
import seaborn as sns
import tensorflow as tf

from sklearn import svm
from sklearn.decomposition import PCA
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import classification_report, accuracy_score, precision_score, recall_score
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.models import Sequential, load_model
from tensorflow.keras.layers import Conv2D, MaxPool2D, Dense, Flatten, Dropout
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, EarlyStopping
from yaml import load, Loader
from PIL import Image
from skimage.transform import resize
from skimage.io import imread

# Yaml file

In [None]:
yaml_file = open("app.yaml", 'r')
yaml_content = load(yaml_file, Loader=Loader)

## Constantes

In [None]:
DATA_DIR = pathlib.Path(yaml_content["DATA_DIR"])
MODELS_DIR = yaml_content["MODELS_DIR"]
WEIGHTS_DIR = pathlib.Path(yaml_content["WEIGHTS_DIR"])

TARGET_NAME = yaml_content["TARGET_NAME"]
TARGET_NAME_TXT = f'images_{TARGET_NAME}_train.txt'

IMAGE_WIDTH = yaml_content["IMAGE_WIDTH"]
IMAGE_HEIGHT = yaml_content["IMAGE_HEIGHT"]
IMAGE_DEPTH = yaml_content["IMAGE_DEPTH"]

# Fonctions

In [None]:
def build_image_database(path, target):
    """Build a pandas dataframe with target class and access path to images.
    
    Parameters
    - - - - - -
    path (Path): path patern to read csv file containing images information.
    target (str): name of the target column.
    
    Returns
    - - - - -
    A pandas dataframe, including target class and path to image.
    """
    _df = pd.read_csv(path, sep='\t', 
            names=['all'],
            dtype={'all': str} # ids are not int but string
            )

    # la fonction split() découpe sur une chaîne de charatères
    _df['image_id'] = _df['all'].apply(lambda x: x.split(' ')[0])

    # la fonction '<car>'.join(liste) concatène les éléments de liste en utilisant le séparateur <car>
    _df[target] = _df['all'].apply(lambda x: ' '.join(x.split(' ')[1:]))

    # la colonne path contient le chemin d'accès à l'image
    _df['path'] = _df['image_id'].apply(lambda x:  pathlib.Path('../data/dataset/data/images') / (x + '.jpg'))

    return _df.drop(columns=['all'])

In [None]:
def build_x_y(df: pd.DataFrame, target: str, images: str, type_model, stratify=None):
    """Build x tensor and y tensor for model fitting.

    Parameters
    ----------
    df (pd.DataFrame): dataframe containing images and target
    target (str): name of target column
    images (str): name of images column
    type_model (int): type of model
                    0 Keras model
                    1 SVM
    
    Returns
    -------
    x (np.array/pd.DataFrame): tensor of x values / DataFrame of multiple columns whose each column represent pixel position
    y (np.array): tensor / array of y values 
    """
    if type_model ==0:
      x = np.array(df[images].to_list())
      y = to_categorical(df[target].astype('category').cat.codes)
    elif type_model ==1:
      x = pd.DataFrame(np.array([img for img in df[images]]))
      y = df[target].astype('category').cat.codes
    else:
      print('The type of model is not correct. Please insert a number between 0-2.\nPS:\n0 Keras model\n1 SVM')  

    return x,y

In [None]:
def build_classification_model(df, target: str, images: str):
    """Build a TF model using information from target and images columns in dataframe
    
    Parameters
    ----------
    df (pd.DataFrame): dataframe with target and images columns
    target(str): column name for target variable
    images(str): column name for images
    
    Returns
    -------
    TF model built & compiled
    """
    nb_classes = df[target].nunique() # Compute number of classes for output layer
    size = df[images].iloc[0].shape # Compute images size for input layer
    
    #Building the model
    model = Sequential()
    model.add(layers.RandomFlip("horizontal"))
    model.add(layers.RandomRotation(0.1))
    model.add(layers.Rescaling(scale=1 / 127.5, offset=-1))
    model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu', input_shape=(size)))
    model.add(Conv2D(filters=32, kernel_size=(5,5), activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.25))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
    model.add(Conv2D(filters=64, kernel_size=(3, 3), activation='relu'))
    model.add(MaxPool2D(pool_size=(2, 2)))
    model.add(Dropout(rate=0.25))
    model.add(Flatten())
    model.add(Dense(256, activation='relu'))
    model.add(Dropout(rate=0.25))
    model.add(Dense(nb_classes, activation='softmax')) # Couche de sortie à nb_classes

    #Compilation of the model
    model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

        
    return model

In [None]:
def classify_images(images, model, type_model=0, classes_names=None):
      """Classify images through a TF model.
      Parameters
      ----------
      images (np.array): set of images to classify
      model (model): TF/ Keras model / SVM
      type_model (int): type of model
                        0 Keras model
                        1 SVM
      classes_names: dictionnary with classes names
      Returns
      -------
      predicted classes
      """
      if type_model ==0:
            results = model.predict(images) # predict for images
            classes = np.argmax(results, axis=1) # np.argmax returns the index of the max value per row
            if classes_names is not None:
                  classes = np.array(classes_names[classes])
      elif type_model==1:
            results = model.predict(images) # predict for images
            if classes_names is not None:
                  classes = np.array(classes_names[results])
      else:
            print('The type of model is not correct. Please insert a number between 0-2.\nPS:\n0 Keras model\n1 SVM')   
      return classes

In [None]:
def evaluation_metrics(df_test, target, y_pred, y_test, images, model, type_model, ret):
    """Give some metrics for model evaluation
    
    Parameters:
    -----------
    df_test (pd.series): DataFrame which contains the test set
    target (str): the target variable
    y_pred (): the model prediction
    y_test (): 
    images (np.array): set of images to classify
    model (model): TF/ Keras model / SVM
    type_model (int): type of model
                    0 Keras model
                    1 SVM
    ret (int): to print or not
    Returns:
    accuracy, precision and recall
    --------
    """
    if type_model ==0:
      y_pred = np.argmax(y_pred, axis=1)
      y_test = df_test[target].astype('category').cat.codes
      if ret ==1:
        print(f'accuracy: {accuracy_score(y_pred,y_test)*100:.2f}%')
        print(f'precision: {precision_score(y_pred,y_test, average="macro")*100:.2f}%')
        print(f'recall: {recall_score(y_pred,y_test, average="macro")*100:.2f}%')
        print(classification_report(y_test, y_pred))
        fig, ax = plt.subplots(figsize=(15,10))
        sns.heatmap(pd.crosstab(df_test[target], 
                                classify_images(images, model, type_model, df_test[target].astype('category').cat.categories), 
                                normalize='index'),
                    cmap='vlag',
                    ax=ax)
      elif ret ==0:
        return f'{accuracy_score(y_pred, y_test)*100:.2f}%', f'{precision_score(y_pred, y_test, average="macro")*100:.2f}%', f'{recall_score(y_pred,y_test, average="macro")*100:.2f}%'  
    elif type_model ==1:
      if ret ==1:
        print(f'accuracy: {accuracy_score(y_pred,y_test)*100:.2f}%')
        print(f'precision: {precision_score(y_pred,y_test, average="macro")*100:.2f}%')
        print(f'recall: {recall_score(y_pred,y_test, average="macro")*100:.2f}%')
        print(classification_report(y_test, y_pred))
        fig, ax = plt.subplots(figsize=(15,10))
        sns.heatmap(pd.crosstab(df_test[target], 
                                classify_images(images, model, type_model, df_test[target].astype('category').cat.categories), 
                                normalize='index'),
                    cmap='vlag',
                    ax=ax)
      elif ret ==0:
        return f'{accuracy_score(y_pred, y_test)*100:.2f}%', f'{precision_score(y_pred, y_test, average="macro")*100:.2f}%', f'{recall_score(y_pred,y_test, average="macro")*100:.2f}%'
    else:
      print('The type of model is not correct. Please insert a number between 0-2.\nPS:\n0 Keras model\n1 SVM')
    

In [None]:
def create_target_class_liss(df, path, target):
    """Generate a txt file that contains the differents target class
    
    Parameters:
    ----------
    df (pd.DataFrame): df which contains the variable whose classes we want to know
    path (Path): path to save the txt file
    target (str): the target variable
    Returns:
    -------
    """
    path = path + '/' + target + '_classes.txt'
    with open(path, 'w') as f:
        for name in [df[target].astype('category').cat.categories][0]:
            f.write('%s\n' %name)
        f.close()
    return 

In [None]:
def load_resize_image(path, type_model, height, width):
    """Load an image and resize it to the target size

    Parameters:
    ----------
    path (Path): access path to the image file
    height (int): resize image to this height
    width (int): resize to this width
    type_model (int): type of model
                    0 Keras model
                    1 SVM
    Returns
    --------
    np.array containing resized image / flaten image
    """
    if type_model ==0:
      return np.array(Image.open(path).resize((width, height)))
    elif type_model ==1:
      return (resize(imread(path)/255,(width,height))).flatten()
    else:
      print('The type of model is not correct. Please insert a number between 0-2.\nPS:\n0 Keras model\n1 SVM')

In [None]:
def plot_history(history):
    """Plot somes fig to see the model progress

    Parameters:
    ----------
    history (keras.callbacks.History) : the model training history

    Returns:
    --------
    """
    acc = history.history['accuracy']
    val_acc = history.history['val_accuracy']
    loss = history.history['loss']
    val_loss = history.history['val_loss']
    epochs = range(len(acc))
    plt.plot(epochs, acc, 'b', label='Training acc')
    plt.plot(epochs, val_acc, 'r', label='Validation acc')
    plt.title('Training and validation accuracy')
    plt.legend()
    plt.figure()
    plt.plot(epochs, loss, 'b', label='Training loss')
    plt.plot(epochs, val_loss, 'r', label='Validation loss')
    plt.title('Training and validation loss')
    plt.legend()
    plt.show()

In [None]:
def save_model(model, type_model, basename, target):
      """Save tf/Keras model

      Model file is named model + timestamp.

      Parameters
      ----------
      model (model): TF/ Keras model / SVM
      type_model (int): type of model
                        0 CNN neural network
                        1 SVM
                        2 Xception
                        3 VGG19
      basename: location to save model file
      target (int): the target variable name. 
      """
      if type_model == 0:
            model.save(f'{basename}/{target}_cnn.h5')
      elif type_model == 1:
            pickle.dump(model,open(f'{basename}/{target}_svm.p','wb'))
      elif type_model == 2:
            model.save(f'{basename}/{target}_xception.h5')
      elif type_model == 3:
            model.save(f'{basename}/{target}_vgg19.h5')
      elif type_model == 4:
            pickle.dump(model,open(f'{basename}/{target}_pca.p','wb'))
      else:
            print('The type of model is not correct. Please insert a number between 0-2.\nPS:\n0 CNN neural network\n1 SVM\n2 Xception\n3 VGG19')
      return

In [None]:
def show_image(df, row, target ):
    """Show an image from an image database, with the associated class.
    
    Parameters
    ----------
    df (pd.DataFrame): images definition dataframe
    row (int): row index in df of image to be displayed
    target (str): name of the target column
    
    Returns
    -------
    None
    """
    assert target in df.columns, 'Missing target column in dataframe'
    assert 'path' in df.columns, 'Missing image path in dataframe'
    print(df.iloc[row,][target])
    plt.imshow(plt.imread(df.iloc[row,]['path']))
    return

# Reading files (train & test, images and class information)

## Verify data

In [None]:
df = build_image_database(DATA_DIR / TARGET_NAME_TXT, TARGET_NAME)

In [None]:
df.isna().sum()

In [None]:
assert df['image_id'].isna().sum() ==0, 'Valeur manquante dans image'
assert df[TARGET_NAME].isna().sum() ==0, 'Valeur manquante dans image'

In [None]:
df

In [None]:
df[TARGET_NAME].value_counts(dropna=False)

In [None]:
show_image(df, 42, TARGET_NAME)

In [None]:
df['shape_image'] = df['path'].apply(lambda p: plt.imread(p).shape)

In [None]:
df.shape_image

In [None]:
# Distribution du nombre de lignes
df.shape_image.apply(lambda x: x[0]).value_counts()

In [None]:
# Distribution du nombre de colonnes
df.shape_image.apply(lambda x: x[1]).value_counts()

In [None]:
df['resized_image'] = df.apply(lambda r: load_resize_image(r['path'], 0, IMAGE_HEIGHT, IMAGE_WIDTH), axis =1)

In [None]:
df[TARGET_NAME].value_counts()

## Generate txt of differents classes

In [None]:
create_target_class_liss(df, MODELS_DIR, TARGET_NAME)

# CNN neural network

## Build train and test set

In [None]:
# Load train & test dataset
train_df = build_image_database(DATA_DIR / TARGET_NAME_TXT, TARGET_NAME)
test_df = build_image_database(DATA_DIR / TARGET_NAME_TXT, TARGET_NAME)

# Load & resize images
train_df['resized_image'] = train_df.apply(lambda r: load_resize_image(r['path'], 0,
                                                                       IMAGE_HEIGHT, IMAGE_WIDTH),
                                           axis=1)
test_df['resized_image'] = test_df.apply(lambda r: load_resize_image(r['path'], 0,
                                                                       IMAGE_HEIGHT, IMAGE_WIDTH),
                                           axis=1)

# Build tensors for training & testing
X_train, y_train = build_x_y(train_df, TARGET_NAME, 'resized_image', 0)
X_test, y_test = build_x_y(test_df, TARGET_NAME, 'resized_image', 0)

## Train model

In [None]:
model = build_classification_model(train_df, TARGET_NAME, 'resized_image')

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

!del -rf ./logs
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
%%time
# Add checkpoint function to save best model
checkpoint = ModelCheckpoint(f'{WEIGHTS_DIR}/best_model_cnn.hdf5', 
                             monitor = 'val_accuracy', 
                             verbose = 0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode = 'max')

# Add LR scheduler 
lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2, #let's reduce LR 5 times
                              patience=3, # if no improvement after 3 epoch - reduce LR
                              min_lr=0.0001,
                              verbose=1,
                              mode='auto')

# Add early stop
earlystop = EarlyStopping(monitor = 'val_accuracy',
                          patience = 10,
                          restore_best_weights = True)

callbacks_list = [earlystop, lr_scheduler, tensorboard_callback, checkpoint]

epochs = 60
history = model.fit(X_train, y_train, batch_size=16, epochs=epochs, 
                    validation_data=(X_test, y_test),
                    callbacks=[callbacks_list]
                    # callbacks=[tensorboard_callback]
                    )

In [None]:
model.summary()

In [None]:
plot_history(history)

In [None]:
%tensorboard --logdir logs/fit

## Saving the model

In [None]:
model.load_weights(f'{WEIGHTS_DIR}/best_model_cnn.hdf5')

In [None]:
save_model(model, 0, MODELS_DIR, TARGET_NAME)

# SVM

## Build train and test set

In [None]:
# Load & resize images
train_df['resized_image_svm'] = train_df.apply(lambda r: load_resize_image(r['path'], 1,
                                                                       IMAGE_HEIGHT, IMAGE_WIDTH),
                                           axis=1)
test_df['resized_image_svm'] = test_df.apply(lambda r: load_resize_image(r['path'], 1,
                                                                       IMAGE_HEIGHT, IMAGE_WIDTH),
                                           axis=1)

# Build tensors for training & testing
X_train_svm, y_train_svm = build_x_y(train_df, 'manufacturer', 'resized_image_svm', 1)
X_test_svm, y_test_svm = build_x_y(test_df, 'manufacturer', 'resized_image_svm', 1)

## PCA

In [None]:
pca = PCA(n_components=2000)

In [None]:
pca.fit(X_train_svm)

In [None]:
pca.explained_variance_ratio_.sum()

In [None]:
X_train_svm = pca.transform(X_train_svm)

In [None]:
X_train_svm.shape

In [None]:
X_test_svm = pca.transform(X_test_svm)

In [None]:
X_test_svm.shape

### Save model

In [None]:
save_model(pca, 4, MODELS_DIR, TARGET_NAME)

## Train model

In [None]:
svc = svm.SVC(kernel = 'linear', probability=True)
model_svm = svc.fit(X_train_svm, y_train_svm)

## Turning hyperparameters

In [None]:
# param_grid = {'C': [0.1, 1, 10, 100], 
#               'gamma': [1, 0.1, 0.01, 0.001],
#               'kernel': ['rbf', 'poly', 'sigmoid', 'linear']
#               }

# model_svm_grid=GridSearchCV(svc,param_grid)
# model_svm_grid.fit(X_train_svm,y_train_svm)

In [None]:
# model_svm_grid.best_params_

## Saving the model

In [None]:
save_model(model_svm, 1, MODELS_DIR, TARGET_NAME)

In [None]:
# save_model(model_svm_grid, 1, MODELS_DIR, TARGET_NAME)

# Transfert learning

## Xception

### Train model

In [None]:
nb_classes = train_df[TARGET_NAME].nunique()
base_model = keras.applications.Xception(
    weights="imagenet",  # Load weights pre-trained on ImageNet.
    input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH),
    include_top=False,
    classifier_activation="softmax"
)  # Do not include the ImageNet classifier at the top.

# Freeze the base_model
base_model.trainable = False

data_augmentation = keras.Sequential(
    [layers.RandomFlip("horizontal"), layers.RandomRotation(0.1),]
)


# Create new model on top
inputs = keras.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
x = data_augmentation(inputs)  # Apply random data augmentation

# Pre-trained Xception weights requires that input be scaled
# from (0, 255) to a range of (-1., +1.), the rescaling layer
# outputs: `(inputs * scale) + offset`
scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)
x = scale_layer(x)

# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)  # Regularize with dropout
outputs = keras.layers.Dense(nb_classes)(x)
model_xception = keras.Model(inputs, outputs)

model_xception.summary()

In [None]:
%%time
model_Xception.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

# # Add checkpoint function to save best model
checkpoint = ModelCheckpoint(f'{WEIGHTS_DIR}/best_model_xception.hdf5', 
                             monitor = 'val_accuracy', 
                             verbose = 0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode = 'max')

# Add LR scheduler 
lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2, #let's reduce LR 5 times
                              patience=3, # if no improvement after 3 epoch - reduce LR
                              min_lr=0.0001,
                              verbose=1,
                              mode='auto')

# Add early stop
earlystop = EarlyStopping(monitor = 'val_accuracy',
                          patience = 10,
                          restore_best_weights = True)

callbacks_list = [earlystop, lr_scheduler, checkpoint]

epochs = 40
history = model_Xception.fit(X_train, y_train, batch_size=32, epochs=epochs, 
                    validation_data=(X_test, y_test),
                   callbacks = callbacks_list)

In [None]:
plot_history(history)

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

!del -rf ./logs_x
log_dir = "logs_x/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
%%time
base_model.trainable = True
model_Xception.summary()

model_Xception.compile(
    optimizer=keras.optimizers.Adam(1e-5),  
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)


# # Add checkpoint function to save best model
checkpoint = ModelCheckpoint(f'{WEIGHTS_DIR}/best_model_xception.hdf5', 
                             monitor = 'val_accuracy', 
                             verbose = 0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode = 'max')

# Add LR scheduler 
lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1, 
                              patience=3, # if no improvement after 3 epoch - reduce LR
                              min_lr=0.000001, 
                              verbose=1,
                              mode='auto')

# Add early stop
earlystop = EarlyStopping(monitor = 'val_accuracy',
                          patience = 10,
                          restore_best_weights = True)

callbacks_list = [earlystop, lr_scheduler, tensorboard_callback, checkpoint]


epochs = 50
history = model_Xception.fit(X_train, y_train, batch_size=64, epochs=epochs, 
                    validation_data=(X_test, y_test),
                   callbacks = callbacks_list)

In [None]:
plot_history(history)

In [None]:
%tensorboard --logdir logs_x/fit

### Save model

In [None]:
model_xception.load_weights(f'{WEIGHTS_DIR}/best_model_xception.hdf5')

In [None]:
save_model(model_xception, 2, MODELS_DIR, TARGET_NAME)

## VGG19

### Train model

In [None]:
nb_classes = train_df[TARGET_NAME].nunique()
base_model_VGG19 = keras.applications.VGG19(
    weights="imagenet",  # Load weights pre-trained on ImageNet.
    input_shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH),
    include_top=False,
    classifier_activation="softmax"
)  # Do not include the ImageNet classifier at the top.

# Freeze the base_model
base_model_VGG19.trainable = False

data_augmentation = keras.Sequential(
    [layers.RandomFlip("horizontal"), layers.RandomRotation(0.1),]
)


# Create new model on top
inputs = keras.Input(shape=(IMAGE_HEIGHT, IMAGE_WIDTH, IMAGE_DEPTH))
x = data_augmentation(inputs)  # Apply random data augmentation


scale_layer = keras.layers.Rescaling(scale=1 / 127.5, offset=-1)
x = scale_layer(x)

# The base model contains batchnorm layers. We want to keep them in inference mode
# when we unfreeze the base model for fine-tuning, so we make sure that the
# base_model is running in inference mode here.
x = base_model_VGG19(x, training=False)
x = keras.layers.GlobalAveragePooling2D()(x)
x = keras.layers.Dropout(0.2)(x)  # Regularize with dropout
outputs = keras.layers.Dense(nb_classes, activation='softmax')(x)
model_VGG19 = keras.Model(inputs, outputs)

model_VGG19.summary()

In [None]:
%%time
model_VGG19.compile(
    optimizer='adam',
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)

# # Add checkpoint function to save best model
checkpoint = ModelCheckpoint(f'{WEIGHTS_DIR}/best_model_vgg19.hdf5', 
                             monitor = 'val_accuracy', 
                             verbose = 0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode = 'max')

# Add LR scheduler 
lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.2, #let's reduce LR 5 times
                              patience=3, # if no improvement after 3 epoch - reduce LR
                              min_lr=0.0001,
                              verbose=1,
                              mode='auto')

# Add early stop
earlystop = EarlyStopping(monitor = 'val_accuracy',
                          patience = 10,
                          restore_best_weights = True)

callbacks_list = [earlystop, lr_scheduler, checkpoint]

epochs = 40
history = model_VGG19.fit(X_train, y_train, batch_size=12, epochs=epochs, 
                    validation_data=(X_test, y_test),
                    callbacks=callbacks_list)

In [None]:
plot_history(history)

In [None]:
# Load the TensorBoard notebook extension
%load_ext tensorboard

!del -rf ./logs_v
log_dir = "logs_v/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Unfreeze the base_model. Note that it keeps running in inference mode
# since we passed `training=False` when calling it. This means that
# the batchnorm layers will not update their batch statistics.
# This prevents the batchnorm layers from undoing all the training
# we've done so far.
%%time
base_model_VGG19.trainable = True
model_VGG19.summary()

model_VGG19.compile(
    optimizer=keras.optimizers.Adam(1e-5),  # Low learning rate
    loss='categorical_crossentropy',
    metrics=['accuracy'],
)


# # Add checkpoint function to save best model
checkpoint = ModelCheckpoint(f'{WEIGHTS_DIR}/best_model_vgg19.hdf5', 
                             monitor = 'val_accuracy', 
                             verbose = 0,
                             save_best_only=True,
                             save_weights_only=True,
                             mode = 'max')

# Add LR scheduler 
lr_scheduler = ReduceLROnPlateau(monitor='val_loss',
                              factor=0.1, 
                              patience=3, # if no improvement after 3 epoch - reduce LR
                              min_lr=0.000001, 
                              verbose=1,
                              mode='auto')

# Add early stop
earlystop = EarlyStopping(monitor = 'val_accuracy',
                          patience = 10,
                          restore_best_weights = True)

callbacks_list = [earlystop, lr_scheduler, tensorboard_callback, checkpoint]



epochs = 40
history = model_VGG19.fit(X_train, y_train, batch_size=24, epochs=epochs, 
                    validation_data=(X_test, y_test),
                    callbacks=callbacks_list)

In [None]:
plot_history(history)

In [None]:
%tensorboard --logdir logs_v/fit

### Save model

In [None]:
model_vgg19.load_weights(f'{WEIGHTS_DIR}/best_model_vgg19.hdf5')

In [None]:
save_model(model_vgg19, 3, MODELS_DIR, TARGET_NAME)

# Models evalutation

In [None]:
dict_evaluation = {"Model": ["CNN neural network", "SVM", "Xception", "VGG19"], 
                   "Input shape": f"{IMAGE_WIDTH}x{IMAGE_HEIGHT}x{IMAGE_DEPTH}",
                   "Accuracy":"",
                   "Precision":"",
                   "Recall":""
                   }
evaluation_df = pd.DataFrame(dict_evaluation)

## CNN neural network

In [None]:
y_pred = model.predict(X_test)

In [None]:
y_pred[0]

In [None]:
evaluation_metrics(test_df, TARGET_NAME, y_pred, y_test, X_test, model, 0, 1)

In [None]:
evaluation_df['Accuracy'][0] = evaluation_metrics(test_df, TARGET_NAME, y_pred, y_test, X_test, model, 0, 0)[0]
evaluation_df['Precision'][0] = evaluation_metrics(test_df, TARGET_NAME, y_pred, y_test, X_test, model, 0, 0)[1]
evaluation_df['Recall'][0] = evaluation_metrics(test_df, TARGET_NAME, y_pred, y_test, X_test, model, 0, 0)[2]

## SVM

In [None]:
y_pred_svm = model_svm.predict(X_test_svm)

In [None]:
y_pred_svm[0]

In [None]:
evaluation_metrics(test_df, TARGET_NAME, y_pred_svm, y_test_svm, X_test_svm, model_svm, 1, 1)

In [None]:
evaluation_df['Accuracy'][1] = evaluation_metrics(test_df, 'manufacturer', y_pred_svm, y_test_svm, X_test_svm, model_svm, 1, 0)[0]
evaluation_df['Precision'][1] = evaluation_metrics(test_df, 'manufacturer', y_pred_svm, y_test_svm, X_test_svm, model_svm, 1, 0)[1]
evaluation_df['Recall'][1] = evaluation_metrics(test_df, 'manufacturer', y_pred_svm, y_test_svm, X_test_svm, model_svm, 1, 0)[2]

In [None]:
# y_pred_svm_grid = model_svm_grid.predict(X_test_svm)

In [None]:
# y_pred_svm_grid[0]

In [None]:
# evaluation_metrics(test_df, TARGET_NAME, y_pred_svm_grid, y_test_svm, X_test_svm, model_svm_grid, 1, 1)

In [None]:
# evaluation_df['Accuracy'][1] = accuracy_score(y_pred_svm_grid,y_test)
# evaluation_df['Precision'][1] = accuracy_score(y_pred_svm_grid,y_test)
# evaluation_df['Recall'][1] = accuracy_score(y_pred_svm_grid,y_test)

## Xception

In [None]:
y_pred_Xception = model_Xception.predict(X_test)

In [None]:
y_pred_Xception[0]

In [None]:
evaluation_metrics(test_df, TARGET_NAME, y_pred_Xception, y_test, X_test, model_Xception, 0, 1)

In [None]:
evaluation_df['Accuracy'][2] = evaluation_metrics(test_df, TARGET_NAME, y_pred_Xception, y_test, X_test, model_Xception, 0, 0)[0]
evaluation_df['Precision'][2] = evaluation_metrics(test_df, TARGET_NAME, y_pred_Xception, y_test, X_test, model_Xception, 0, 0)[1]
evaluation_df['Recall'][2] = evaluation_metrics(test_df, TARGET_NAME, y_pred_Xception, y_test, X_test, model_Xception, 0, 0)[2]

## VGG19

In [None]:
y_pred_VGG19 = model_VGG19.predict(X_test)

In [None]:
y_pred_VGG19[0]

In [None]:
evaluation_metrics(test_df, TARGET_NAME, y_pred_VGG19, y_test, X_test, model_VGG19, 0, 1)

In [None]:
evaluation_df['Accuracy'][3] = evaluation_metrics(test_df, TARGET_NAME, y_pred_VGG19, y_test, X_test, model_VGG19, 0, 0)[0]
evaluation_df['Precision'][3] = evaluation_metrics(test_df, TARGET_NAME, y_pred_VGG19, y_test, X_test, model_VGG19, 0, 0)[1]
evaluation_df['Recall'][3] = evaluation_metrics(test_df, TARGET_NAME, y_pred_VGG19, y_test, X_test, model_VGG19, 0, 0)[2]

# Best model

In [None]:
evaluation_df[evaluation_df["Accuracy"] == evaluation_df["Accuracy"].max()]

In [None]:
evaluation_df

In [None]:
evaluation_df.to_csv(f'{MODELS_DIR}/evaluation_df2.csv')

# Génération du fichier requirements.txt

In [2]:
!pip freeze | findstr "imread= keras= matplotlib= numpy= pandas= pickle= pil= pyyaml= PyYAML= scikit-learn= scikit-image= seaborn= streamlit= tensorflow= yaml=" > ../requirements.txt

In [2]:
# !pip freeze | findstr "==" > ../requirements.txt