In [1]:
import warnings, os, datetime, zipfile, tqdm, gdown, glob, random, shutil, pytz
import matplotlib.pyplot as plt
import tensorflow as tf
import wandb

warnings.filterwarnings("ignore", category=DeprecationWarning)
warnings.filterwarnings("ignore", category=UserWarning)

In [3]:
timezone = pytz.timezone('Europe/Paris')
now = datetime.datetime.now(timezone).strftime('%Y.%m.%d-%H.%M.%S')

project_path = os.getcwd()

paths = {
    "data_path": f"{project_path}/data",
    "train_data_path": f"{project_path}/data/train",
    "model_path": f"{project_path}/models",
    "checkpoint_path": f"{project_path}/weights/model_early",
    "log_path": f"{project_path}/logs/fit/{now}_model"
}

for key, path in paths.items():
    os.makedirs(path, exist_ok=True)

image_h = 224
image_w = 224
batch_s = 32

encoding_dim = 256

In [6]:
def download_dataset(force=False):
    # Construct the dataset path
    dataset_path = os.path.join(paths['train_data_path'], 'dataset_livrable_2.zip')

    # Check if the dataset is already downloaded
    if os.path.exists(dataset_path) and not force:
        print("Dataset is already downloaded.")
    else:
        print("Downloading dataset...")
        url = 'https://drive.google.com/uc?export=download&id=190NL04KXMiUsnC-rdYDB9PLxCgk-MKuy'
        gdown.download(url, dataset_path, quiet=False)
    
    # Check if the dataset is already extracted
    if len(os.listdir(paths['train_data_path'])) == 2 and not force:
        print("Dataset is already extracted.")
    else:
        print("Extracting dataset...")
        with zipfile.ZipFile(dataset_path, 'r') as zip_ref:
            files = zip_ref.infolist()
            with tqdm.tqdm(total=len(files), desc="Extracting", unit="file") as pbar:
                for file in files:
                    zip_ref.extract(file, paths['train_data_path'])
                    pbar.update(1)
            print(f"Dataset extracted to {paths['train_data_path']}")
        
# Call the function
download_dataset()

Downloading dataset...


Downloading...
From (original): https://drive.google.com/uc?export=download&id=190NL04KXMiUsnC-rdYDB9PLxCgk-MKuy
From (redirected): https://drive.google.com/uc?export=download&id=190NL04KXMiUsnC-rdYDB9PLxCgk-MKuy&confirm=t&uuid=74c8a450-a84c-4f0e-8608-ca8244f40da5
To: C:\Users\lefra\Documents\Projets\ProjetDeepLearning\livrable_2\data\train\dataset_livrable_2.zip
100%|██████████| 808M/808M [01:23<00:00, 9.71MB/s] 


Extracting dataset...


Extracting: 100%|██████████| 5001/5001 [00:09<00:00, 524.99file/s]

Dataset extracted to C:\Users\lefra\Documents\Projets\ProjetDeepLearning\livrable_2/data/train





In [7]:
# Load and split images into train and test sets (80%-20%)
train_set, test_set = tf.keras.preprocessing.image_dataset_from_directory(
    paths['train_data_path'],
    validation_split=0.2,
    subset="both",
    seed=42,
    image_size=(image_h, image_w),
    batch_size=batch_s,
    labels=None,
    label_mode=None
)


Found 5000 files.
Using 4000 files for training.
Using 1000 files for validation.


In [9]:
def get_callbacks():
    # Create a callback that saves the model's weights at each epoch where validation loss improves
    checkpoint_callback = tf.keras.callbacks.ModelCheckpoint(
        filepath=paths['checkpoint_path'] + "/weights-epoch-{epoch:02d}-{val_loss:.2f}.weights.h5",
        save_weights_only=True,
        save_best_only=True,
        verbose=1
    )

    # Create a TensorBoard callback to log training metrics, model graphs, and images for visualization
    tensorboard_callback = tf.keras.callbacks.TensorBoard(
        log_dir=paths['log_path'],
        histogram_freq=1,
        write_graph=True,
        write_images=True,
        update_freq='epoch',
        profile_batch=0,
        embeddings_freq=0
    )

    # Set up early stopping to halt training if validation loss stops improving for a set number of epochs
    early_callback = tf.keras.callbacks.EarlyStopping(
        monitor='val_loss',
        min_delta=0,
        patience=5,
        verbose=0,
        mode='auto',
        baseline=None,
        restore_best_weights=True,
        start_from_epoch=0
    )

    # W&B callback
    wandb_callback = wandb.keras.WandbMetricsLogger()

    return [
        checkpoint_callback,
        tensorboard_callback,
        early_callback,
        wandb_callback
    ]

In [20]:
rescale_layer = tf.keras.layers.Rescaling(1./255)

print(rescale_layer)

def add_gaussian_noise(images, min_noise=0.3, max_noise=0.6):
    batch_size = tf.shape(images)[0]
    noise_factor = tf.random.uniform(shape=(batch_size, 1, 1, 1), minval=min_noise, maxval=max_noise)
    noise = tf.random.normal(shape=tf.shape(images))
    noisy_images = images + noise_factor * noise
    noisy_images = tf.clip_by_value(noisy_images, 0.0, 1.0)
    return noisy_images

<Rescaling name=rescaling_2, built=False>


In [12]:
def build_autoencoder(input_shape):
    input_img = tf.keras.layers.Input(shape=input_shape)

    # **Encoder**
    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(input_img)
    x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)

    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)

    x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(x)


    encoded = tf.keras.layers.MaxPooling2D((2, 2), padding='same')(x)

    # **Decoder**
    x = tf.keras.layers.Conv2D(128, (3, 3), activation='relu', padding='same')(encoded)
    x = tf.keras.layers.UpSampling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(64, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.UpSampling2D((2, 2))(x)

    x = tf.keras.layers.Conv2D(32, (3, 3), activation='relu', padding='same')(x)
    x = tf.keras.layers.UpSampling2D((2, 2))(x)

    decoded = tf.keras.layers.Conv2D(3, (3, 3), activation='sigmoid', padding='same')(x)

    # **Self-encoding model**
    autoencoder = tf.keras.Model(input_img, decoded)
    return autoencoder

In [8]:
sweep_config = {
    'method': 'bayes',  # You can also use 'random' or 'grid' search.
    'metric': {
        'name': 'val_loss',  # The metric to optimize.
        'goal': 'minimize'
    },
    'parameters': {
        'learning_rate': {
            'values': [0.001, 0.0001, 0.00001]  # Range of values for learning rate
        },
        'batch_size': {
            'values': [32, 64, 128]  # Test different batch sizes
        },
        'optimizer': {
            'values': ['adam', 'sgd', 'rmsprop']  # Try different optimizers
        },
        'min_noise': {
            'values': [0.2, 0.3, 0.4]  # Range for Gaussian noise
        },
        'max_noise': {
            'values': [0.5, 0.6, 0.7]  # Range for Gaussian noise
        }
    }
}

In [21]:
def train_model(config=None):
    # Initialize W&B run
    with wandb.init(config=config):
        config = wandb.config

        # Use the config to set hyperparameters
        optimizer = config.optimizer
        learning_rate = config.learning_rate
        batch_size = config.batch_size

        noisy_train_set = train_set.map(
            lambda x: (add_gaussian_noise(rescale_layer(x)), rescale_layer(x)),
            num_parallel_calls=tf.data.AUTOTUNE
        ).cache().prefetch(buffer_size=tf.data.AUTOTUNE)

        # Adds Gaussian noise and rescales the test set images, optimizing with parallel processing.
        noisy_test_set = test_set.map(
            lambda x: (add_gaussian_noise(rescale_layer(x)), rescale_layer(x)),
            num_parallel_calls=tf.data.AUTOTUNE
        ).cache().prefetch(buffer_size=tf.data.AUTOTUNE)


        # Build and compile the autoencoder model
        autoencoder = build_autoencoder((image_h, image_w, 3))
        if optimizer == 'adam':
            opt = tf.keras.optimizers.Adam(learning_rate=learning_rate)
        elif optimizer == 'sgd':
            opt = tf.keras.optimizers.SGD(learning_rate=learning_rate)
        elif optimizer == 'rmsprop':
            opt = tf.keras.optimizers.RMSprop(learning_rate=learning_rate)

        autoencoder.compile(optimizer=opt, loss='mse')

        # Train the model
        history = autoencoder.fit(
            noisy_train_set,
            validation_data=noisy_test_set,
            epochs=10,
            callbacks=get_callbacks()  # Includes W&B logging
        )


In [None]:
# Initialize the sweep
sweep_id = wandb.sweep(sweep_config, project='livrable_2')

# Start the sweep
wandb.agent(sweep_id, train_model, count=100)

wandb.finish()

Create sweep with ID: 4dpgk0fj
Sweep URL: https://wandb.ai/sourcierdeverite-cesi/livrable_2/sweeps/4dpgk0fj


wandb: Agent Starting Run: w122n8m1 with config:
wandb: 	batch_size: 128
wandb: 	learning_rate: 0.001
wandb: 	max_noise: 0.5
wandb: 	min_noise: 0.2
wandb: 	optimizer: rmsprop
Failed to detect the name of this notebook, you can set it manually with the WANDB_NOTEBOOK_NAME environment variable to enable code saving.


Epoch 1/50
 49/125 ━━━━━━━━━━━━━━━━━━━━ 2:09 2s/step - loss: 0.0781