# Diffusion
Dataset : https://www.kaggle.com/datasets/mohannadaymansalah/stable-diffusion-dataaaaaaaaa

## Imports

In [None]:
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.preprocessing.image import load_img, img_to_array
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import os
import random as r

gpus = tf.config.experimental.list_physical_devices('GPU')
if gpus:
    try:
        for gpu in gpus:
            tf.config.experimental.set_memory_growth(gpu, True)
        print(f"GPUs detected: {len(gpus)}")
        print(f"GPUs: {gpus}")
    except RuntimeError as e:
        print(e)
else:
    print("No GPUs detected")

### Constants

In [2]:
INPUT_SHAPE = (512, 512, 3)
BATCH_SIZE = 16
STEPS = 1000 # Nombre d'étapes de diffusion
BETA_1 = 1e-4
BETA_T = 2e-2
PATH_DATASETS = ['stable-diffusion-face-dataset/512/man','stable-diffusion-face-dataset/512/woman']
VALIDATION_SPLIT = 0.2
TEST_SPLIT = 0.1
EPOCHS = 100


## Data préprosessing

### Loading of dataset

In [None]:
def load_and_preprocess_images(paths, target_size):
    data = []
    for path in paths:
        for img_name in os.listdir(path):
            img_path = os.path.join(path, img_name)
            img = load_img(img_path, target_size=target_size)
            img_array = img_to_array(img, dtype='uint8')
            data.append(img_array)
    return np.array(data, dtype='uint8')

data = load_and_preprocess_images(PATH_DATASETS, INPUT_SHAPE[:2])
print(f"Loaded {len(data)} images with shape {data.shape} and dtype {data.dtype}")

### Exemple

In [None]:
idx = r.randint(0, len(data))
plt.imshow(data[idx])

### Data préprosessing

In [None]:
train_data, val_data, test_data = np.split(data, [int(len(data)*(1-VALIDATION_SPLIT-TEST_SPLIT)), int(len(data)*(1-TEST_SPLIT))])
print(f"Train data shape: {train_data.shape}")
print(f"Validation data shape: {val_data.shape}")
print(f"Test data shape: {test_data.shape}")

## Model

### Gaussian noice tests

#### Curves

In [None]:
nb_images = 10

class LinearNoiceScheduler:
    def __init__(self):
        self.betas = np.linspace(BETA_1, BETA_T, STEPS)
        self.alphas = 1 - self.betas
        self.sqrt_alphas = np.sqrt(self.alphas)
        self.c_alphas = np.cumprod(self.alphas)
        self.sqrt_c_alphas = np.sqrt(self.c_alphas)
        self.sqrt_one_minus_c_alphas = np.sqrt(1 - self.c_alphas)
    
    def add_noise(self, original_images, noise, step):
        original_shape = original_images.shape

        sqrt_alpha_cumprod = self.sqrt_c_alphas[step]
        sqrt_one_minus_alpha_cumprod = self.sqrt_one_minus_c_alphas[step]

        noisy_image = sqrt_alpha_cumprod*original_images + sqrt_one_minus_alpha_cumprod*noise
        return noisy_image

    def sampler(self, noisy_image, noise_pred, step):
        x0 = ( noisy_image - self.sqrt_one_minus_c_alphas[step]*noise_pred ) / self.sqrt_c_alphas[step]
        mean = (noisy_image - (self.betas[step] * noise_pred)) / self.sqrt_one_minus_c_alphas[step]
        mean = mean / self.sqrt_alphas[step]

        if step == 0:
            return mean, x0
        
        variance = (1 - self.c_alphas[step-1]) / (1 - self.c_alphas[step])
        variance = variance * self.betas[step]
        sigma = variance ** 0.5
        z = np.random.normal(size=noisy_image.shape)
        return mean + sigma * z, x0


NoiceScheduler = LinearNoiceScheduler()

# Sélectionner nb_images aléatoires du dataset
selected_images = data[np.random.choice(len(data), nb_images, replace=False)] / 255.0
noise = np.random.normal(size=selected_images.shape[1:])

# Initialiser les listes pour stocker les moyennes et les écarts types
means = []
stds = []

step_jump = 10

# Calculer la moyenne et l'écart type à chaque étape
for step in range(0, STEPS, step_jump):
    noisy_images = NoiceScheduler.add_noise(selected_images, noise, step)
    means.append(np.mean(noisy_images))
    stds.append(np.std(noisy_images))

# Plot des résultats
plt.figure(figsize=(12, 6))
plt.subplot(1, 2, 1)
plt.plot(range(0, STEPS, step_jump), means, label='Mean')
plt.xlabel('Step')
plt.ylabel('Mean')
plt.title('Mean of Noisy Images')
plt.legend()

plt.subplot(1, 2, 2)
plt.plot(range(0, STEPS, step_jump), stds, label='Standard Deviation')
plt.xlabel('Step')
plt.ylabel('Standard Deviation')
plt.title('Standard Deviation of Noisy Images')
plt.legend()
plt.tight_layout()

# On prend quelques images aléatoires
idx = r.randint(1, len(selected_images)-1)
test_images = selected_images[:idx]

# Show noise
plt.figure(figsize=(6, 6))
noise_img = np.clip(noise, 0, 1)
plt.imshow(noise_img)
plt.title('Noise')
plt.axis('off')
# Show noise distribution
plt.figure(figsize=(6, 6))
plt.hist(noise.flatten(), bins=150)
plt.title('Noise Distribution')
plt.xlabel('Intensity')
plt.ylabel('Frequency')

plt.show()



In [None]:
# plot of cumprod_alphas
plt.figure(figsize=(6, 6))
plt.plot(NoiceScheduler.c_alphas)
plt.xlabel('Step')
plt.ylabel('Cumprod alpha')

#### Test on one sample

In [None]:
# Nombre d'images bruitées à afficher
bruitage = 14

test_image = test_images[0]
jump = STEPS // bruitage
# Show noisying and denoising of one of the test images
fig, axes = plt.subplots(2, bruitage+1, figsize=(20, 4))
for slot in range(bruitage+1):
    #print('slot :', slot)
    step = min((slot+1) * jump, STEPS-1)
    #print('step :', step)
    c_alpha = NoiceScheduler.c_alphas[step]
    noisy_images = NoiceScheduler.add_noise(test_image, noise, step)
    info, denoiced_image = NoiceScheduler.sampler(noisy_images, noise, step)
    noisy_image_plt = np.clip(noisy_images, 0, 1)
    denoiced_image_plt = np.clip(denoiced_image, 0, 1)
    axes[0,slot].imshow(noisy_image_plt)
    axes[1,slot].imshow(denoiced_image_plt)
    axes[0,slot].axis('off')
    axes[1,slot].axis('off')
    axes[0,slot].set_title(f'{step} - {c_alpha:.2f}')
    axes[1,slot].set_title(f'{step} - {c_alpha:.2f}')

plt.show()

### Definition

#### Test of positional encoding

In [None]:
def positional_encoding(position, d_model):
    # position : un tableau des positions
    # d_model : la dimensionnalité des embeddings
    angle_rates = 1 / np.power(10000, (2 * (np.arange(d_model) // 2)) / np.d_model)
    angle_rads = position[:, np.newaxis] * angle_rates[np.newaxis, :]
    
    # Appliquer sin sur les indices pairs, cos sur les indices impairs
    angle_rads[:, 0::2] = np.sin(angle_rads[:, 0::2])  # sin pour les indices pairs
    angle_rads[:, 1::2] = np.cos(angle_rads[:, 1::2])  # cos pour les indices impairs

    return angle_rads

def conv_block(input_tensor, num_filters):
    x = layers.Conv2D(num_filters, (3, 3), padding="same")(input_tensor)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    
    x = layers.Conv2D(num_filters, (3, 3), padding="same")(x)
    x = layers.BatchNormalization()(x)
    x = layers.Activation("relu")(x)
    
    return x

def encoder_block(input_tensor, num_filters):
    x = conv_block(input_tensor, num_filters)
    p = layers.MaxPooling2D((2, 2))(x)
    return x, p

def decoder_block(input_tensor, skip_features, num_filters):
    x = layers.Conv2DTranspose(num_filters, (2, 2), strides=2, padding="same")(input_tensor)
    x = layers.Concatenate()([x, skip_features])
    x = conv_block(x, num_filters)
    return x

def build_unet(input_shape):
    inputs = layers.Input(input_shape)

    # Encoder
    s1, p1 = encoder_block(inputs, 64)
    s2, p2 = encoder_block(p1, 128)
    s3, p3 = encoder_block(p2, 256)
    s4, p4 = encoder_block(p3, 512)

    # Bottleneck
    b = conv_block(p4, 1024)

    # Decoder
    d1 = decoder_block(b, s4, 512)
    d2 = decoder_block(d1, s3, 256)
    d3 = decoder_block(d2, s2, 128)
    d4 = decoder_block(d3, s1, 64)

    outputs = layers.Conv2D(3, (1, 1), activation="sigmoid")(d4)  # output is a 3-channel image (RGB)

    model = tf.keras.Model(inputs, outputs)
    return model

# Construire le modèle
unet_model = build_unet(INPUT_SHAPE)
unet_model.summary()

### Training

In [None]:
def batch_generator(data, batch_size):
    while True:
        idx = r.sample(range(len(data)), batch_size)
        yield data[idx]

## Evaluation

### Loss curves

### Tests