# Livrable 2

## Contenu du livrable

Le but est de traiter un ensemble de photographies afin de les rendre mieux traitables par les algorithmes de Machine Learning. Le traitement à réaliser est une opération de débruitage. Ces algorithmes s'appuieront sur les auto-encodeurs à convolution, et les appliqueront pour améliorer la qualité de l'image.

1. Chargement des données provenant de l'EDA (livrable 1)
2. Création du dataset
3. Définition de l'autoencodeur (CAE)
4. Entrainement
5. Métriques


## Chargement des bibliothèques

In [None]:
import os
import PIL
import imghdr
import pathlib
import numpy as np
import tensorflow as tf
from tensorflow import keras
from collections import Counter
import matplotlib.pyplot as plt
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential

## Création du dataset

In [None]:
dataset_url = "../dataset"
data_dir = pathlib.Path(dataset_url)

categories = ["Painting", "Photo", "Schematics", "Sketch", "Text"]

validation_split = 0.2
seed = 42

batch_size = 32
img_height = 64
img_width = 64

In [None]:
# Load the dataset using image_dataset_from_directory

train_set = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=validation_split,
    subset="training",
    seed=seed,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    labels=None
)

val_set = tf.keras.utils.image_dataset_from_directory(
    data_dir,
    validation_split=validation_split,
    subset="validation",
    seed=seed,
    image_size=(img_height, img_width),
    batch_size=batch_size,
    labels=None
)

# class_names = train_set.class_names

# print(f"Classes found: {class_names}")

### Image dimensions

In [None]:
for images in val_set.take(1):
    print(f"Image dimensions: {images.shape}")
    break

## Visualisation

In [None]:
plt.figure(figsize=(8, 8))
for images in train_set.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.axis("off")

## Préparation

In [None]:
def add_gaussian_noise(image, mean=0.0, stddev=70):
    """Applies Gaussian noise to an image."""
    noise = tf.random.normal(shape=tf.shape(image), mean=mean, stddev=stddev, dtype=tf.float32)
    noisy_image = tf.cast(image, tf.float32)# / 255.0
    noisy_image = noisy_image + noise
    noisy_image = tf.clip_by_value(noisy_image, 0.0, 255.0)
    return noisy_image

noisy_train_set = train_set.map(lambda x: add_gaussian_noise(x))
noisy_val_set = val_set.map(lambda x: add_gaussian_noise(x))

In [None]:
plt.figure(figsize=(8, 8))
for images in noisy_train_set.take(1):
    for i in range(9):
        ax = plt.subplot(3, 3, i + 1)
        plt.imshow(images[i].numpy().astype("uint8"))
        plt.axis("off")
    plt.show()

## Performance & pre processing

In [None]:
# AUTOTUNE = tf.data.experimental.AUTOTUNE

# a_train_set = noisy_train_set.cache().shuffle(1000).prefetch(buffer_size=AUTOTUNE)
# a_val_set = val_set.cache().prefetch(buffer_size=AUTOTUNE)

for image in noisy_train_set.take(1):
    print(f"Image shape: {image.shape}")

for image in noisy_val_set.take(1):
    print(f"Image shape: {image.shape}")

## Modélisation

In [None]:
# Configurations principales de nos modèles
IMG_SIZE          = img_width
NB_EPOCHS_DENOISE = 100               # nombre epoch alogithme debruiter
BATCH_SIZE        = 128               # taille batch de traitement
SAV_MODEL_DENOISE = "denoiser.h5"     # sauvegarde du modele de debruitage
LATENT_DIM        = 32

## Encodeur

In [None]:
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D
from keras.models import Sequential

# Create a Sequential model
encoder = Sequential([
    Input(shape=(IMG_SIZE, IMG_SIZE, 3)),
    layers.Rescaling(1./255, input_shape=(img_height, img_width, 3)),
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2), padding='same'),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    MaxPooling2D((2, 2))
])

## Décodeur

In [None]:
from keras.models import Sequential

# Decoding #

# TODO =>=>=>=>=>=>=>=>=>=>=>=>=>=>=> drop out

# Create a Sequential model for the decoder
decoder = Sequential([
    Input(shape=encoder.output_shape[1:]),
    Conv2D(128, (3, 3), activation='relu', padding='same'),
    UpSampling2D((2, 2)),
    Conv2D(64, (3, 3), activation='relu', padding='same'),
    UpSampling2D((2, 2)),
    Conv2D(32, (3, 3), activation='relu', padding='same'),
    UpSampling2D((2, 2)),
    Conv2D(3, (1, 1), activation='sigmoid', padding='same'),
])


In [None]:
from tensorflow.keras.models import Model

class Autoencoder(Model):
  def __init__(self, latent_dim):
    super(Autoencoder, self).__init__()
    self.latent_dim = latent_dim   
    self.encoder = encoder
    self.decoder = decoder

  def call(self, x):
    encoded = self.encoder(x)
    decoded = self.decoder(encoded)
    return decoded

autoencoder = Autoencoder(LATENT_DIM)

In [None]:
autoencoder.compile(optimizer='adam',
                    loss='binary_crossentropy')
# autoencoder.summary()

encoder.summary()
decoder.summary()

## Entrainement

In [None]:
# Train the model
import tensorflow as tf

paired_train_set = tf.data.Dataset.zip((noisy_train_set, train_set))
paired_val_set = tf.data.Dataset.zip((noisy_val_set, val_set))

# Train the autoencoder
history = autoencoder.fit(
    paired_train_set,
    epochs=NB_EPOCHS_DENOISE,
    batch_size=BATCH_SIZE,
    shuffle=True,
    validation_data=(paired_val_set),
    callbacks=[tf.keras.callbacks.TensorBoard(log_dir='/tmp/tb', histogram_freq=0, write_graph=False)]
)

## Métriques

- Courbe d'apprentisssage
- Métrique
- Matrice de confusion

In [None]:
# Visualisation des pertes d'apprentissage (Train) et de validation (Test)
plt.plot(history.history['loss'], label='train')
plt.plot(history.history['val_loss'], label='test')
plt.legend()