In [None]:
# Donne à Colab un accès à un Drive
from google.colab import drive
drive.mount('/content/drive')

In [None]:
# Modules nécessaires
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

import sys, os
from pathlib import Path
import glob

from sklearn.model_selection import train_test_split

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.preprocessing import image_dataset_from_directory
from tensorflow.keras.preprocessing.image import ImageDataGenerator
from tensorflow.keras.callbacks import ModelCheckpoint, ReduceLROnPlateau

In [None]:
## Copie locale des fichiers (tous dans le même dossier, changer le chemin d'accès)
#!cp -r "/content/drive/MyDrive/Leukopy/Data_PBC/Data_PBC.zip" .
#!unzip PBC_dataset_normal_DIB.zip

## Copie locale des fichiers (sous-répertoires training et testing => c'était utilisé avec flow_from_directory)
!cp -r "/content/drive/MyDrive/Leukopy/Data/Data.zip" .
!unzip Data.zip

In [None]:
## Toutes les données dans un seul et même dossier :
# liste = glob.glob('./Data_PBC/*/*.jpg')
# liste = list(map(lambda x : [x, x.split('/')[2]], liste))
# df = pd.DataFrame(liste, columns = ['path', 'label'])

#df, df_test = train_test_split(df, test_size = 0.2)
#df_train, df_valid = train_test_split(df, test_size = 0.12)

In [None]:
## Données pré-séparées dans deux dossiers, training et testing :
# Chemin d'accès aux images
liste_train = glob.glob('./Data/training/*/*.jpg')
liste_test = glob.glob('./Data/testing/*/*.jpg')

# Extrait le label de chaque image
liste_train = list(map(lambda x : [x, x.split('/')[3]], liste_train))
liste_test = list(map(lambda x : [x, x.split('/')[3]], liste_test))

# DataFrames
df_train = pd.DataFrame(liste_train, columns = ['path', 'label'])
df_test = pd.DataFrame(liste_test, columns = ['path', 'label'])

# Jeu de validation
df_train, df_valid = train_test_split(df_train, test_size = 0.12)

In [None]:
# Preprocessing

batch_size = 32
img_height  = 256
img_width = 256

# Augmentation seulement sur le jeu d'entraînement. On normalise toutes les images.
train_generator = ImageDataGenerator(rotation_range = 90,
                                     horizontal_flip = True, 
                                     vertical_flip = True, 
                                     rescale = 1./255)
valid_generator = ImageDataGenerator(rescale = 1./255)
test_generator = ImageDataGenerator(rescale = 1./255)

# Batchs, redimensionnement, chargement des images
training_set = train_generator.flow_from_dataframe(df_train, 
                                                   directory = None, # utilise x_col
                                                   x_col = 'path', 
                                                   y_col = 'label',
                                                   target_size = (img_height, img_width), 
                                                   color_mode = 'rgb',
                                                   classes = None,   # utilise y_col
                                                   class_mode = 'categorical', 
                                                   batch_size = batch_size, 
                                                   shuffle = True)

validation_set = valid_generator.flow_from_dataframe(df_valid, 
                                                     directory = None, # utilise x_col
                                                     x_col = 'path', 
                                                     y_col = 'label',
                                                     target_size = (img_height, img_width), 
                                                     color_mode = 'rgb',
                                                     classes = None,   # utilise y_col
                                                     class_mode = 'categorical', 
                                                     batch_size = batch_size, 
                                                     shuffle = True)

testing_set = test_generator.flow_from_dataframe(df_test, 
                                                 directory = None, # utilise x_col
                                                 x_col = 'path', 
                                                 y_col = 'label',
                                                 target_size = (img_height, img_width),
                                                 color_mode = 'rgb',
                                                 classes = None,   # utilise y_col
                                                 class_mode = 'categorical', 
                                                 batch_size = batch_size, 
                                                 shuffle = True)

In [None]:
# Correspondance indices/labels (ordre alphanumérique):
print('Train :', training_set.class_indices)
print('Valid :', validation_set.class_indices)
print('Test  :', testing_set.class_indices)

label_map = training_set.class_indices

In [None]:
# Modèle LeNet

model = Sequential(
    [layers.Conv2D(filters = 30,
                   kernel_size = (5,5),
                   activation = 'relu',
                   padding = 'valid', 
                   input_shape = (img_height, img_width, 3)),
     layers.MaxPooling2D(pool_size = (2,2)),
     layers.Conv2D(filters = 16,
                   kernel_size = (3,3),
                   activation = 'relu',
                   padding = 'valid'),
     layers.MaxPooling2D(pool_size = (2,2)),
     layers.Dropout(rate = 0.2),
     layers.Flatten(),
     layers.Dense(units = 128, activation = 'relu'),
     layers.Dense(units = 8, activation  = 'softmax')
     ])

model.compile(optimizer = 'adam',
              loss = "categorical_crossentropy",
              metrics = ["accuracy"])

In [None]:
history = model.fit(training_set, epochs = 20, validation_data = validation_set)

In [None]:
# Courbe d'entrainement

In [None]:
training_accuracy = history.history['accuracy']
validation_accuracy = history.history['val_accuracy']

plt.figure()
plt.plot(np.arange(20), training_accuracy, label = 'Training Set')
plt.plot(np.arange(20), validation_accuracy, label = 'Validation Set')
plt.xlabel('Epochs')
plt.ylabel('Accuracy')
plt.xlim([0,19])
plt.ylim([0.5,1])
plt.legend()
plt.show()

In [None]:
model.evaluate(testing_set)

In [None]:
# Sauvegarde du modèle sur Drive
model.save("/content/drive/MyDrive/Leukopy/LeNetModel/model_lenet_augment")