In [None]:
## Pour Colab :
## Penser à vérifier si le GPU est actif

# Donne à Colab un accès au Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
## Modules nécessaires
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sys, os
from pathlib import Path
import glob

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.applications import EfficientNetB0
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau, TerminateOnNaN, EarlyStopping

In [None]:
## Vérifie si le GPU est actif
tf.test.gpu_device_name()

In [None]:
## Copie locale des fichiers (tous dans le même dossier, changer le chemin d'accès)
#!cp -r "/content/drive/MyDrive/Leukopy/Data_PBC/Data_PBC.zip" .
#!unzip Data_PBC.zip

## Copie locale des fichiers (sous-répertoires training et testing => c'était utilisé avec flow_from_directory)
!cp -r "/content/drive/MyDrive/Leukopy/Data/Data.zip" .
!unzip Data.zip

In [None]:
## Toutes les données dans un seul et même dossier :
# liste = glob.glob('./Data_PBC/*/*.jpg')
# liste = list(map(lambda x : [x, x.split('/')[2]], liste))
# df = pd.DataFrame(liste, columns = ['path', 'label'])

#df, df_test = train_test_split(df, test_size = 0.2)
#df_train, df_valid = train_test_split(df, test_size = 0.12)

In [None]:
## Données pré-séparées dans deux dossiers, training et testing :
# Chemin d'accès aux images
liste_train = glob.glob('./Data/training/*/*.jpg')
liste_test = glob.glob('./Data/testing/*/*.jpg')

# Extrait le label de chaque image
liste_train = list(map(lambda x : [x, x.split('/')[3]], liste_train))
liste_test = list(map(lambda x : [x, x.split('/')[3]], liste_test))

# DataFrames
df_train = pd.DataFrame(liste_train, columns = ['path', 'label'])
df_test = pd.DataFrame(liste_test, columns = ['path', 'label'])

# Jeu de validation
df_train, df_valid = train_test_split(df_train, test_size = 0.12)

In [None]:
# Preprocessing
batch_size = 32
img_height  = 224
img_width = 224

# Augmentation seulement sur le jeu d'entraînement. On ne normalise pas, EfficientNet le fait lui-même.
train_generator = ImageDataGenerator(rotation_range = 90,
                                     horizontal_flip = True, 
                                     vertical_flip = True)
valid_generator = ImageDataGenerator()
test_generator = ImageDataGenerator()

# Batchs, redimensionnement, chargement des images
training_set = train_generator.flow_from_dataframe(df_train, 
                                                   directory = None, # utilise x_col
                                                   x_col = 'path', 
                                                   y_col = 'label',
                                                   target_size = (img_height, img_width), 
                                                   color_mode = 'rgb',
                                                   classes = None,   # utilise y_col
                                                   class_mode = 'categorical', 
                                                   batch_size = batch_size, 
                                                   shuffle = True)

validation_set = valid_generator.flow_from_dataframe(df_valid, 
                                                     directory = None, # utilise x_col
                                                     x_col = 'path', 
                                                     y_col = 'label',
                                                     target_size = (img_height, img_width), 
                                                     color_mode = 'rgb',
                                                     classes = None,   # utilise y_col
                                                     class_mode = 'categorical', 
                                                     batch_size = batch_size, 
                                                     shuffle = True)

testing_set = test_generator.flow_from_dataframe(df_test, 
                                                 directory = None, # utilise x_col
                                                 x_col = 'path', 
                                                 y_col = 'label',
                                                 target_size = (img_height, img_width),
                                                 color_mode = 'rgb',
                                                 classes = None,   # utilise y_col
                                                 class_mode = 'categorical', 
                                                 batch_size = batch_size, 
                                                 shuffle = True)

In [None]:
# Correspondance indices/labels (ordre alphanumérique):
print('Train :', training_set.class_indices)
print('Valid :', validation_set.class_indices)
print('Test  :', testing_set.class_indices)

label_map = training_set.class_indices

In [None]:
# Callbacks :

TON = TerminateOnNaN()

save_model = ModelCheckpoint(filepath = '/content/drive/MyDrive/Leukopy/EfficientNetModel/B0_augment', 
                             monitor = 'val_loss', 
                             save_best_only = True,
                             save_weights_only = True,
                             save_freq = 'epoch')

control_lr = ReduceLROnPlateau(monitor = 'val_loss',
                               factor = 0.1, 
                               patience = 3, 
                               verbose = 1, 
                               mode = 'min')

early_stopping = EarlyStopping(monitor = "val_loss", 
                               patience = 6, 
                               mode = 'min',
                               restore_best_weights = True)

In [None]:
## Modèle : EfficientNet pour transfer learning

base_model = EfficientNetB0(include_top = False,
                            weights = 'imagenet',
                            input_shape = (224,224,3),
                            pooling = 'avg')

for layer in base_model.layers:
    layer.trainable = False
    
model = Sequential()
model.add(base_model)
model.add(layers.Dense(units = 1280, activation = 'relu'))
model.add(layers.Dense(units = 640, activation = 'relu'))
model.add(layers.Dense(units = 8, activation = 'softmax'))

# Compilation
model.compile(optimizer = 'adam',
              loss = "categorical_crossentropy",
              metrics = ["accuracy"])

epochs = 30
history = model.fit(training_set, 
                    epochs = epochs, 
                    validation_data = validation_set, 
                    callbacks = [save_model, control_lr, TON, early_stopping])

In [None]:
# Courbe d'entrainement :
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

plt.figure()
plt.plot(np.arange(epochs), training_accuracy, label = 'Training Set')
plt.plot(np.arange(epochs), validation_accuracy, label = 'Validation Set')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.xlim([0,19])
plt.ylim([0.5,1])
plt.legend()
plt.show()

In [None]:
# Évaluation sur les données test :

model.evaluate(testing_set)