# Projet détection Covid par radio : Etape de modélisation

Implémentation du modèle LeNet à faire tourner avec Colab

### Import des packages

In [3]:
import tensorflow as tf
import numpy as np
import pandas as pd
import PIL
import matplotlib.pyplot as plt
import time
import os
import pathlib
import shutil
import tqdm
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Model, Sequential
import pickle
from tensorflow.keras import datasets, layers, models, losses
from tensorflow.keras.models import Sequential, Model
from tensorflow.keras.layers import Dense, Activation, Input
from tensorflow.keras.layers import Dropout
from tensorflow.keras.layers import Flatten
from tensorflow.keras.layers import Conv2D
from tensorflow.keras.layers import MaxPooling2D
from matplotlib import cm

from sklearn import metrics
import itertools

### Import du dataset

In [None]:
#Unzip file


In [None]:
#Load dataset in Keras

In [None]:
#1 : Indiquer le dossier et compter le nb d'images dans le dataset
dossier_train = ""
data_dir = pathlib.Path(dossier_train).with_suffix('')
image_count = len(list(data_dir.glob('*/*.png')))
print(f"{image_count} images dans le dataset")

2000 images dans le dataset


In [None]:
#2 : Chargement du dataset dans Keras
#define parameters for loader
batch_size = 32
img_height = 256
img_width = 256


#Create train set 80%
train_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="training",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode = "grayscale")

#Create validation set = 20%
val_ds = tf.keras.utils.image_dataset_from_directory(
  data_dir,
  validation_split=0.2,
  subset="validation",
  seed=123,
  image_size=(img_height, img_width),
  batch_size=batch_size,
  color_mode = "grayscale")

#Class names = from subfolders in Train folder
class_names = train_ds.class_names
num_classes = len(class_names)
print(class_names)


Found 2000 files belonging to 4 classes.
Using 1600 files for training.
Found 2000 files belonging to 4 classes.
Using 400 files for validation.


In [None]:
#Configure dataset for performance with CACHE (not loading images at each epoch) and PREFETCH (overlapping data processing and model execution)
AUTOTUNE = tf.data.AUTOTUNE
train_ds = train_ds.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
val_ds = val_ds.cache().prefetch(buffer_size=AUTOTUNE)

### Construction de l'algorithme LeNet

In [None]:
#Implémentation du CNN Le Net5
# Architecture du modèle
inputs=Input(shape = (256, 256 ,1), name = "Input")

lenet = Sequential([
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 1)),
    layers.Conv2D(filters = 30,                     # Nombre de filtres
                kernel_size = (5, 5),            # Dimensions du noyau
                padding = 'valid',               # Mode de Dépassement
                input_shape = (28, 28, 1),       # Dimensions de l'image en entrée
                activation = 'relu', 
                name = "Conv1"),             # Fonction d'activation
    layers.MaxPooling2D(pool_size = (2, 2)),
    layers.Conv2D(filters = 16,                    
                kernel_size = (3, 3),          
                padding = 'valid',             
                activation = 'relu',
                name = "Conv2"),
    layers.MaxPooling2D(pool_size = (2, 2)),
    layers.Flatten(),
    layers.Dropout(rate = 0.2),
    layers.Dense(units = 128,
                activation = 'relu',
                name = "Dense1"),
    layers.Dense(units = 10,
                activation = 'softmax',
                name = "Dense_final")],
        name = "LeNet")

# Compilation
lenet.compile(loss='categorical_crossentropy',  # fonction de perte
              optimizer='adam',                 # algorithme de descente de gradient
              metrics=['accuracy'])             # métrique d'évaluation


#Résumé
lenet.summary()

In [None]:
#Définir les callbacks : Earlystopping, Tensorboard

#Early stopping
Early_Stopping = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss', 
    patience = 10,
    verbose = 1,
    restore_best_weights = True)

#Tensorboard
log_dir = "logs/fit/" + datetime.datetime.now().strftime("%Y%m%d-%H%M%S")
tensorboard_callback = tf.keras.callbacks.TensorBoard(log_dir=log_dir, histogram_freq=1)

In [None]:
# Entraînement_1
#Entraînement du modèle

model = lenet #update when changing models

#Train model
epochs=100
history = model.fit(
  train_ds,
  validation_data=val_ds,
  epochs=epochs,
  callbacks = [Early_Stopping, tensorboard_callback]
)



In [None]:
%load_ext tensorboard
#%reload_ext tensorboard

In [None]:
#Visualize training results
train_acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

train_loss = history.history['loss']
val_loss = history.history['val_loss']

epochs_range = range(epochs)

plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(train_acc, label='Training Accuracy')
plt.plot(val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(train_loss, label='Training Loss')
plt.plot(val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')
plt.show()


In [4]:
#Sauvegarde du modèle
model = lenet
#Sauvegarde du modèle
dossier_sauv = "../../models/Thibaut/"
nom_modèle = f"{model._name}.-valacc{val_acc[-1]:.2f}.keras"
model.save(os.path.join(dossier_sauv, nom_modèle))  # The file needs to end with the .keras extension

NameError: name 'model' is not defined

In [None]:
#Export results to dict & csv
#Nom modèle : model._name, date: , dataset folder, dataset size, ephocs : len(val_acc), best_model : train_acc, val_acc, train_loss, val_loss
#datetime.datetime.now().strftime("%Y%m%d-%H%M%S")

#Dump to a pickle file
# Pickle the history to file
filepath = f"{model._name}_history.pkl"
with open(filepath, 'wb') as f:
    pickle.dump(history, f)


# convert the history.history dict to a pandas DataFrame:     
hist_df = pd.DataFrame(history.history) 

# or save to csv: 
hist_csv_file = f"{model._name}_history.csv"
with open(hist_csv_file, mode='w') as f:
    hist_df.to_csv(f)