# CNN Transfert learning avec ResNet

## Import des données

In [None]:
!git clone -b dev https://github.com/Finweto/Projet-deep-learning-2022.git
!ls 

##Utilisation d'une GPU

In [None]:
!nvidia-smi

## Import des librairies nécessaires

In [None]:
import itertools
import os
import pathlib
import random
import typing

import cv2
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
import seaborn
import sklearn.utils
import sklearn.metrics
import tensorflow as tf
import keras

from keras.models import Sequential
from keras.layers import RandomFlip, RandomZoom, RandomRotation

## Préparation des données



In [None]:
label_names = ["convolvulaceae","monimiaceae","amborella","castanea","desmodium","eugenia",
              "laurus","litsea","magnolia","rubus","ulmus"]

def get_images(filename):

  df=pd.read_csv(filename)
  df=df.sample(frac=1).reset_index(drop=True)

  images = []
  labels = []

  for i in range(len(df["label"])):
    path="Projet-deep-learning-2022/"+df["img_path"][i]

    image=cv2.imread(path)
    image=cv2.resize(image,(224,224))
    images.append(image)

    label=df["label"][i]
    labels.append(label)

  return images,labels

label_to_index = {l: i for i, l in enumerate(label_names)}

In [None]:
images, labels= get_images("Projet-deep-learning-2022/data_train_labeled_v3.csv")
images_test,labels_test=get_images("Projet-deep-learning-2022/data_test_labeled_v3.csv")

In [None]:
from sklearn.model_selection import train_test_split

images, images_validation, labels, labels_validation = train_test_split(images, labels, stratify=labels, test_size=0.2) # before model building

In [None]:
images=np.array(images)
labels=np.array(labels)

images_test=np.array(images_test)
labels_test=np.array(labels_test)

images_validation=np.array(images_validation)
labels_validation=np.array(labels_validation)

labels_int=[label_names.index(x) for x in labels]
labels_int=np.array(labels_int)

labels_test_int=[label_names.index(x) for x in labels_test]
labels_test_int=np.array(labels_test_int)

labels_validation_int=[label_names.index(x) for x in labels_validation]
labels_validation_int=np.array(labels_validation_int)

In [None]:
from keras.preprocessing.image import ImageDataGenerator

datagen=ImageDataGenerator()

train_augm = datagen.flow(images, labels_int, batch_size=30, shuffle=True)
valid_augm = datagen.flow(images_validation, labels_validation_int, batch_size=30,shuffle=True)


## Affichage des données

In [None]:
seaborn.countplot(x=labels)
plt.title("Décomptes des différents labels")
plt.ylabel("Décompte")
plt.xlabel("Label")
plt.show()

In [None]:
import random

# generation d'index aleatoires
random_indexes = []
for i in range(15):
  random_index = random.randint(0,20)
  if (random_index not in random_indexes):
    random_indexes.append(random_index)

# affichage d'exemples d'images et de leur label

for i in range(1,len(random_indexes)):
  plt.figure()
  random_index= random_indexes[i]
  plt.imshow(images[random_index])
  plt.title('Exemple ['+str(random_index)+'] '+labels[random_index])
  plt.axis('off')
 
plt.show()

## Modèle de transfer learning avec ResNet101


In [None]:
from keras.applications import ResNet101
from keras import layers
from sklearn.model_selection import KFold

epochs=200

# Define the K-fold Cross Validator
kfold = KFold(n_splits=num_folds, shuffle=True)

# K-fold Cross Validation model evaluation
fold_no = 1
for train, test in kfold.split(inputs, targets):


pretrained = keras.Sequential([
    ResNet101(input_shape=(224,224,3), weights="imagenet", include_top=False)
],name="densenet")

pretrained.trainable = False


model = keras.Sequential([
    pretrained,
    layers.GlobalAveragePooling2D(),
    layers.Dropout(0.5),
    layers.Dense(128,activation = "relu"),
    layers.Dropout(0.5),
    layers.Dense(256,activation = "relu"),
    layers.Dropout(0.5),
    layers.Dense(units=11, activation='Softmax')
], name="model")

In [None]:
model.compile(
    loss="sparse_categorical_crossentropy",
    metrics=['acc'],
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
)

In [None]:
import math
from keras.callbacks import EarlyStopping, ReduceLROnPlateau, ModelCheckpoint

early = EarlyStopping(monitor="val_loss",
                      patience=math.floor(epochs*0.1))

learning_rate_reduction = ReduceLROnPlateau(monitor="val_loss",
                                             patience=2,
                                             verbose=1,
                                             factor=0.3,
                                             min_lr=0.000001)

model_checkpoint_callback = ModelCheckpoint(
    filepath='model',
    save_weights_only=True,
    monitor='val_acc',
    mode='max',
    save_best_only=True)


training = model.fit(
    train_augm,
    validation_data=valid_augm,
    epochs=epochs,
    callbacks=[early,model_checkpoint_callback]
)

## Affichage de l'entrainement

In [None]:
# Plot des métriques d'entraînement
def plot_metrics(history) -> None:
  plt.plot(training.history["acc"])
  plt.plot(training.history["val_accuracy"])
  plt.title("Accuracy du modèle")
  plt.ylabel("Accuracy")
  plt.xlabel("Epoch")
  plt.legend(["Entraînement", "Validation"], loc="upper left")
  plt.show()

  plt.plot(training.history["loss"])
  plt.plot(training.history["val_loss"])
  plt.title("Perte du modèle")
  plt.ylabel("Perte")
  plt.xlabel("Epoch")
  plt.legend(["Entraînement", "Validation"], loc="upper right")
  plt.show()

In [None]:
plot_metrics(training.history)

## Évaluation des performances sur l'ensemble de test


In [None]:
# Modèle obtenu à la fin de l'entrainement

model.evaluate(images_test, labels_test_int)

In [None]:
# Modèle le plus optimisé de l'entrainement

modelopti = model

modelopti.load_weights('model')

modelopti.compile(
    loss="sparse_categorical_crossentropy",
    metrics=['acc'],
    optimizer=keras.optimizers.Adam(learning_rate=0.001),
)

modelopti.evaluate(images_test,labels_test_int)

## Analyse d'erreur

On affiche la matrice de confusion pour voir quels labels sont le mieux prédit.

In [None]:
def analyze_preds(preds, labels):
  confusion_matrix = sklearn.metrics.confusion_matrix(preds,
                                                      labels,
                                                      normalize="true")
  seaborn.heatmap(confusion_matrix,
                  cmap="rocket_r",
                  xticklabels=label_names,
                  yticklabels=label_names)
  plt.title("Matrice de confusion")
  plt.show()

  seaborn.countplot(x=list(map(lambda x: label_names[x], preds)))
  plt.title("Décomptes des classes prédites")
  plt.ylabel("Décompte")
  plt.xlabel("Class")
  plt.show()


test_pred = np.argmax(model.predict(images_test), axis=-1)
analyze_preds(test_pred, labels_test_int)