## 1.2 Dataset

In [None]:
# !pip install glob2

In [None]:
%load_ext autoreload
%autoreload 2

import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.utils import image_dataset_from_directory
import numpy as np
import sys


# Configurations principales de nos modèles
IMG_SIZE          = 400             # taille coté final d'une image en pixel (ici 28x28)
NB_EPOCHS_DENOISE = 40               # nombre epoch alogithme debruiter
BATCH_SIZE        = 4            # taille batch de traitement
SAVE_MODEL_DENOISE = "denoiser.h5"     # sauvegarde du modele de debruitage

def process(image):
    image = tf.cast(image/255. ,tf.float32)
    return image


# Import du .env
import dotenv
import os

# Chargement du .env !!!!!!!!!!!! CHANGER LE PATH !!!!!!!!!!!!!!
# Renvoie true si le .env est chargé
dotenv.load_dotenv('/home/cesi/datascience/.env.local')

models_path = os.environ.get('MODELS_PATH_LIVRABLE2')
sys.path.insert(0, models_path)

import builder_vae
import homemade
import test2
import resnet
import test3


SAVE_WEIGHTS_PATH = os.environ.get('WEIGHT_PATH_LIVRABLE2')
SOURCE_LIVRABLE2_PATH = os.getenv("SOURCE_LIVRABLE2_PATH")

In [None]:

x_train, x_test = image_dataset_from_directory(
    SOURCE_LIVRABLE2_PATH,
    image_size=(IMG_SIZE, IMG_SIZE),
    batch_size=BATCH_SIZE,
    label_mode="categorical",
    # label_mode=None,
    shuffle=False,
    validation_split=0.9,
    subset="both",
    seed=123,
    color_mode="rgb"
)

AUTOTUNE = tf.data.experimental.AUTOTUNE

x_train = x_train.map(lambda x,y: (x/255,y))
x_test = x_test.map(lambda x,y: (x/255,y))

Commençons par écrire une fonction qui permet de visualiser $n$ premiers enregistrements en noir et blanc. 

In [None]:
import matplotlib.pyplot as plt
from random import uniform
import tensorflow as tf
# os.chdir(r'/tf')
from keras_cv.layers import RandomGaussianBlur

def add_noise(img, perturbation_conf):
    pertubation = perturbation_conf['perturbation']
    noised_img = img
    if pertubation == 1:
        blur_kernel_value = perturbation_conf['blur_kernel_size']
        noised_img = RandomGaussianBlur(kernel_size=blur_kernel_value, factor=(0.5, 3))(noised_img)
    elif pertubation == 2:
        noise_value = perturbation_conf['noise_factor']
        noise = tf.random.normal(shape=tf.shape(img), mean=0.0, stddev=noise_value/255, dtype=tf.float32)
        noised_img = tf.cast(img, tf.float32) + noise
    elif pertubation == 3:
        noise_value = perturbation_conf['noise_factor']
        blur_kernel_value = perturbation_conf['blur_kernel_size']
        noise = tf.random.normal(shape=tf.shape(img), mean=0.0, stddev=noise_value/255, dtype=tf.float32)
        noised_img = tf.cast(img, tf.float32) + noise
        noised_img = RandomGaussianBlur(kernel_size=blur_kernel_value, factor=(0.5, 1))(noised_img)
    else:
        noised_img = img
        
    return noised_img, img


noise_configuration = {
    0:{
    'perturbation': 1,
    'blur_kernel_size': 6,
    },
    1:{
    'perturbation': 2,
    'noise_factor': 20
    },
    2:{
    'perturbation': 3,
    'noise_factor': 12,
    'blur_kernel_size': 2
    },
    3:{
    'perturbation': 0
    }
}

x_train_noisy = x_train.map(lambda x,y: (add_noise(x, noise_configuration[1])))
x_test_noisy = x_test.map(lambda x,y: (add_noise(x, noise_configuration[1])))

x_train_blur = x_train.map(lambda x,y: (add_noise(x, noise_configuration[0])))
x_test_blur = x_test.map(lambda x,y: (add_noise(x, noise_configuration[0])))

x_train_noise_blur = x_train.map(lambda x,y: (add_noise(x, noise_configuration[2])))
x_test_noise_blur = x_test.map(lambda x,y: (add_noise(x, noise_configuration[2])))

x_train_clean = x_train.map(lambda x,y: (add_noise(x, noise_configuration[3])))
x_test_clean = x_test.map(lambda x,y: (add_noise(x, noise_configuration[3])))

random_train_set = x_train_clean.concatenate(x_train_noisy).concatenate(x_train_blur).concatenate(x_train_noise_blur)
random_test_set = x_test_clean.concatenate(x_test_noisy).concatenate(x_test_blur).concatenate(x_test_noise_blur)


# display three images on a single row with matplotlib 
def display_images(*images):
    """Display images on a single row."""
    plt.figure(figsize=(50, 50))
    for index, image in enumerate(images):
        plt.subplot(1, len(images), index+1)
        plt.imshow(image)
        plt.axis('off')
    plt.show()


Voyons ce que ça donne :

# AutoEncoder

In [None]:
MODEL_CHOSEN = 'test2'

def load_model(model_choosen):
    match(model_choosen):
        case 'homemade':
            model = homemade.build(IMG_SIZE)
        case 'vae':
            model = builder_vae.build(IMG_SIZE, 64)
        case 'test2':
            model = test2.build(IMG_SIZE)
        case 'test3':
            model = test3.build(IMG_SIZE)
        case 'resnet':
            model = resnet.build(IMG_SIZE)
    return model
autoencoder = load_model(MODEL_CHOSEN)

### Entrainement de l'auto-encodeur

On va ensuite entraîner l'auto-encodeur en utilisant les constantes définit au début (`NB_EPOCHS_DENOISE,BATCH_SIZE`)

In [None]:
# path = createTrainingData.create_training_data(weight_path, model, model_choosen, num_classes, image_h, image_w, batch_size)

# checkpoint_path = path+"/cp-{epoch:04d}.ckpt"
# checkpoint_dir = os.path.dirname(checkpoint_path)

# weights_callback = tf.keras.callbacks.ModelCheckpoint(
#     filepath=checkpoint_path,
#     verbose=1,
#     save_weights_only=True,
#     save_freq='epoch')

# history = model.fit(train_set, epochs=epochs, validation_data=test_set, callbacks=[weights_callback])

history = autoencoder.fit(
    random_train_set,
    epochs=1,
    shuffle=True,
    validation_data=random_test_set
)

Affichez maintenant la courbe d'apprentissage. Que pensez-vous des performances du modèle ?

In [None]:
epochs_range = range(3)
acc = history.history['accuracy']
val_acc = history.history['val_accuracy']

loss = history.history['loss']
val_loss = history.history['val_loss']


plt.figure(figsize=(16, 8))
plt.subplot(1, 2, 1)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_acc, label='Validation Accuracy')
plt.legend(loc='lower right')
plt.title('Training and Validation Accuracy')

plt.subplot(1, 2, 2)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.legend(loc='upper right')
plt.title('Training and Validation Loss')

plt.savefig(SAVE_WEIGHTS_PATH + MODEL_CHOSEN + '/training_400.png')

plt.show()

In [None]:
model_weight_path = SAVE_WEIGHTS_PATH + MODEL_CHOSEN + '/weights_400.h5' 

In [None]:
autoencoder.save_weights(model_weight_path)

In [None]:
autoencoder.load_weights(model_weight_path)

In [None]:
test_sets = {
    'noisy': x_test_noisy.take(10),
    'blur': x_test_blur.take(10),
    'noise_blur': x_test_noise_blur.take(10),
    'clean': x_test_clean.take(10),
    # 'overall': random_test_set
}
results = {}
accuracy = {}
loss = {}
for test_set_name in test_sets:
    loss[test_set_name], accuracy[test_set_name] = autoencoder.evaluate(test_sets[test_set_name])
accuracy['overall'] = np.mean(list(accuracy.values()))
loss['overall'] = np.mean(list(loss.values()))

In [None]:
colors = ['b', 'g', 'r', 'c', 'm', 'y', 'k']
bars = plt.bar(range(len(accuracy)), list(accuracy.values()), align='center', color=colors, alpha=0.8, width=0.5)
plt.xticks(range(len(accuracy)), list(accuracy.keys()))

plt.title('Accuracy for each test set')
plt.savefig(SAVE_WEIGHTS_PATH+"resnet/accuracy_400.png")
plt.show()

plt.bar(range(len(loss)), list(loss.values()), align='center', color=colors, alpha=0.8, width=0.5)
plt.xticks(range(len(loss)), list(loss.keys()))
plt.title('Loss for each test set')
plt.savefig(SAVE_WEIGHTS_PATH+"resnet/loss_400.png")
plt.show()

In [None]:
predicitions = autoencoder.predict(random_test_set)

In [None]:
for img_index in range(3,6):
    # pred = predicitions[img_index]
    original = list(x_test_clean.take(1).as_numpy_iterator())[0][0][img_index]
    noisy = list(x_test_noisy.take(1).as_numpy_iterator())[0][0][img_index]
    pred = autoencoder.predict(list(x_test_clean.take(1).as_numpy_iterator())[0][0])[img_index]

    display_images(original, noisy, pred)

In [None]:
# Visualisation des pertes d'apprentissage (Train) et de validation (Test)
plt.plot( #A COMPLETER
         label='train')
plt.plot( #A COMPLETER
         label='test')
plt.legend()

Que pensez-vous des performances du modèle ?
<em>À COMPLÉTER</em>


# 1.4 Sauvgarde de l'auto-encodeur

L'entrainement de l'auto-encodeur sans utilisation de puissance de calcul (GPU) peut prendre beaucoup de temps. Usuellement, nous sauvegardons le modèle entraîné en local ou sur un serveur distant pour l'utiliser ultérieurement afin de traiter les nouvelles données (d'ailleurs, vous verrez une utilisation avancée de cette technique, le transfert learning, dès la semaine prochaine).
Pour sauvegarder le modèle `autoencoder`, utiliser la méthode `save`.

In [None]:
# save the model
#A COMPLETER


In [None]:
decoded_imgs = #A COMPLETER
