In [None]:
# This Python 3 environment comes with many helpful analytics libraries installed
# It is defined by the kaggle/python Docker image: https://github.com/kaggle/docker-python
# For example, here's several helpful packages to load

import numpy as np # linear algebra
import pandas as pd # data processing, CSV file I/O (e.g. pd.read_csv)

# Input data files are available in the read-only "../input/" directory
# For example, running this (by clicking run or pressing Shift+Enter) will list all files under the input directory

# import os
# for dirname, _, filenames in os.walk('/kaggle/input'):
#     for filename in filenames:
#         print(os.path.join(dirname, filename))

# You can write up to 20GB to the current directory (/kaggle/working/) that gets preserved as output when you create a version using "Save & Run All" 
# You can also write temporary files to /kaggle/temp/, but they won't be saved outside of the current session

import tensorflow as tf
from tensorflow.keras import layers, models, optimizers
import matplotlib.pyplot as plt
import numpy as np
import os
from glob import glob
from tqdm import tqdm

import zipfile

### Objective

In this project I aim to use a GAN model in order to learn from the pictures presented in the data set. My aim is to create a generator and discriminator and then train the model using the Monet paintings data set. I then will run the model agianst real pictures the data in order to attempt to recreate them as paintings.

### Loading and Preparing the Data

I begin by loading in by loading in the Monet paintings data set and the photo data set. I prepare datasets for my CycleGAN by loading and preprocessing images of Monet paintings and photos. I define the directories containing the images and use glob to gather their file paths. Each image is resized and normalized to [-1,1] and batched for better processing. Using TensorFlow's AUTOTUNE, I optimize the data pipeline for faster loading and create two datasets, monet_dataset and photo_dataset, ready for use in training.

In [None]:
DATASET_DIR = '/kaggle/input/gan-getting-started/'
MONET_DIR = os.path.join(DATASET_DIR, 'monet_jpg')
PHOTO_DIR = os.path.join(DATASET_DIR, 'photo_jpg')

# Image paths
monet_images = glob(os.path.join(MONET_DIR, '*.jpg'))
photo_images = glob(os.path.join(PHOTO_DIR, '*.jpg'))

# Preprocessing
def load_and_preprocess(image_path):
    img = tf.io.read_file(image_path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.resize(img, [256, 256])
    img = (img / 127.5) - 1  # Normalize to [-1, 1]
    return img

def create_tf_dataset(image_list, batch_size=4):  # Larger batch size for speed
    return tf.data.Dataset.from_tensor_slices(image_list).map(
        load_and_preprocess, num_parallel_calls=tf.data.AUTOTUNE
    ).batch(batch_size).prefetch(tf.data.AUTOTUNE)

monet_dataset = create_tf_dataset(monet_images, batch_size=8)
photo_dataset = create_tf_dataset(photo_images, batch_size=8)

### Building the Generator


I build a generator model for my CycleGAN to transform images. The model starts by taking an input image of size 
and processes it in three stages. First, during the downsampling stage, the image is reduced in size using convolutional layers with increasing filter sizes (64, 128, 256) to capture important features while shrinking the spatial dimensions. 

Next, in the bottleneck stage, the features are further compressed using a layer with 512 filters to focus on the most essential details. Finally, during the upsampling stage, the image is resized back to its original dimensions using transposed convolution layers with decreasing filter sizes (256, 128, 64), progressively restoring the details. The last layer ensures the output image has the same size as the input, with pixel values normalized between [−1,1]

In [None]:
def build_generator():
    model = models.Sequential(name="Generator")
    model.add(layers.Input(shape=(256, 256, 3)))

    # Downsampling blocks
    for filters in [64, 128, 256]:
        model.add(layers.Conv2D(filters, 4, strides=2, padding='same'))
        model.add(layers.BatchNormalization())
        model.add(layers.ReLU())

    # Bottleneck block
    model.add(layers.Conv2D(512, 4, strides=2, padding='same'))
    model.add(layers.BatchNormalization())
    model.add(layers.ReLU())

    # Upsampling blocks
    for filters in [256, 128, 64]:
        model.add(layers.Conv2DTranspose(filters, 4, strides=2, padding='same'))
        model.add(layers.BatchNormalization())
        model.add(layers.ReLU())

    # Restore to 256x256x3 
    model.add(layers.Conv2DTranspose(3, 4, strides=2, padding='same', activation='tanh'))

    return model

generator_monet = build_generator()

### Building the Discriminator


In this code, I create a discriminator model for my GAN to decide if an image is real or fake. It processes the image through layers that extract features using filters (64, 128) and gradually reduce the size of the image with strides. After each feature extraction layer, batch normalization is used to make training more stable, and Leaky ReLU activation is applied to help the model learn better by allowing small gradients even for negative values.

At the end, the model outputs a single-channel result where each value represents whether a part of the image is likely real or fake. This discriminator is essential for helping the GAN distinguish between actual Monet paintings and generated ones.

In [None]:
def build_discriminator():
    model = models.Sequential(name="Discriminator")
    model.add(layers.Input(shape=(256, 256, 3)))
    for filters in [64, 128]:
        model.add(layers.Conv2D(filters, 4, strides=2, padding='same'))
        model.add(layers.BatchNormalization())
        model.add(layers.LeakyReLU(negative_slope=0.2))  # Replace `alpha` with `negative_slope`
    model.add(layers.Conv2D(1, 4, padding='same'))
    return model

### Defining the Loss Function

In this code, I define the loss functions for training my CycleGAN. The generator_loss measures how well the generator fools the discriminator by comparing its predictions on fake images to the target "real." The discriminator_loss evaluates how well the discriminator distinguishes real images from fake ones, combining losses for both real and fake predictions. The cycle_loss ensures the generator preserves the original image's structure by penalizing differences between the input image and the reconstructed image, scaled by a factor of 5. These losses work together to help the CycleGAN create realistic Monet-style images while keeping the content of the original photo.

In [None]:
loss_fn = tf.keras.losses.BinaryCrossentropy(from_logits=True)

def generator_loss(fake_output):
    return loss_fn(tf.ones_like(fake_output), fake_output)

def discriminator_loss(real_output, fake_output):
    real_loss = loss_fn(tf.ones_like(real_output), real_output)
    fake_loss = loss_fn(tf.zeros_like(fake_output), fake_output)
    return real_loss + fake_loss

def cycle_loss(real_image, cycled_image):
    return tf.reduce_mean(tf.abs(real_image - cycled_image)) * 5

### Training Step

This code trains the CycleGAN by updating the generator and discriminator models. The generator creates fake images and reconstructs original ones, while the discriminator evaluates real and fake images. Losses are calculated to improve the generator’s ability to fool the discriminator and maintain the input image’s structure. Gradients are computed and applied using optimizers to adjust the models during training efficiently. The @tf.function speeds up execution by optimizing the code as a computational graph.

In [None]:
gen_optimizer = tf.keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.6)  # Lower learning rate
disc_optimizer = tf.keras.optimizers.Adam(learning_rate=2e-4, beta_1=0.6)
discriminator_monet = build_discriminator()

# Training step
@tf.function  
def train_step(real_monet, real_photo):
    with tf.GradientTape(persistent=True) as tape:
        fake_photo = generator_monet(real_monet, training=True)
        cycled_monet = generator_monet(fake_photo, training=True)

        real_monet_preds = discriminator_monet(real_monet, training=True)
        fake_monet_preds = discriminator_monet(fake_photo, training=True)

        g_loss = generator_loss(fake_monet_preds) + cycle_loss(real_monet, cycled_monet)
        d_loss = discriminator_loss(real_monet_preds, fake_monet_preds)

    gen_grads = tape.gradient(g_loss, generator_monet.trainable_variables)
    disc_grads = tape.gradient(d_loss, discriminator_monet.trainable_variables)

    gen_optimizer.apply_gradients(zip(gen_grads, generator_monet.trainable_variables))
    disc_optimizer.apply_gradients(zip(disc_grads, discriminator_monet.trainable_variables))
    return g_loss, d_loss


### Training Loop

For training, I use 5 epochs. For each epoch, it calculates the generator and discriminator losses over all batches, averages them, and records the time taken to complete the epoch. The generator loss reflects how well the generator creates convincing fake images, while the discriminator loss measures how effectively the discriminator distinguishes real from fake images.

Based on the printed results, the model improves gradually over the epochs, as indicated by the decrease in both generator and discriminator losses. The generator loss decreases from 2.0399 to 1.6986, showing that the generator is getting better at creating realistic Monet-style images. Similarly, the discriminator loss stabilizes around 1.4, suggesting it maintains a balance in distinguishing real and fake images. The model demonstrates good progress and stability, showing that both the generator and discriminator are learning effectively.

In [None]:
import time

EPOCHS = 5  
for epoch in range(EPOCHS):
    print(f"\nEpoch {epoch + 1}/{EPOCHS}")
    start_time = time.time() 

    total_gen_loss = 0
    total_disc_loss = 0
    step_count = 0

    for real_monet, real_photo in zip(monet_dataset, photo_dataset):
        gen_loss, disc_loss = train_step(real_monet, real_photo)
        total_gen_loss += gen_loss.numpy()
        total_disc_loss += disc_loss.numpy()
        step_count += 1

    # average losses
    avg_gen_loss = total_gen_loss / step_count
    avg_disc_loss = total_disc_loss / step_count

    epoch_time = time.time() - start_time
    print(f"Epoch {epoch + 1} completed in {epoch_time:.2f} seconds")
    print(f"  Avg Generator Loss: {avg_gen_loss:.4f}")
    print(f"  Avg Discriminator Loss: {avg_disc_loss:.4f}")

       

### Generate Submission

Here, the model is used to generate Monet-style images from real photos using the trained generator model and saves the results. 
It processes the first 1,000 photos from the dataset, converting each photo into a Monet-style image, denormalizing the pixel values, and saving the output to a directory. For the first five images, both the input photos and their generated Monet-style counterparts are stored for visualization. 

A few of these images are displayed side by side using Matplotlib to provide a quick qualitative evaluation of the generator's performance. It appears the model can mimic the images, however the Monet style generated photos are not high quality. They do appear to catch the main features of the photos not the detail. This could possibly be improved by decreasing learning rate and addign more epochs to the model to make it learn more rigorously. The trade off would however be that the model would take much longer to run. 



In [None]:
output_dir = './monet_generated'
os.makedirs(output_dir, exist_ok=True)


input_images = []  
generated_images = []  

for i, photo in tqdm(enumerate(photo_dataset.take(1000)), total=1000):  
    generated_monet = generator_monet(photo, training=False)[0]
    generated_monet = (generated_monet.numpy() * 127.5 + 127.5).astype(np.uint8)  

    tf.keras.preprocessing.image.save_img(os.path.join(output_dir, f"monet_{i}.jpg"), generated_monet)

    if i < 5:
        input_images.append((photo[0].numpy() * 127.5 + 127.5).astype(np.uint8))
        generated_images.append(generated_monet)

# first 5 images
plt.figure(figsize=(15, 10))
for i in range(5):
    # Input image
    plt.subplot(5, 2, 2 * i + 1)
    plt.imshow(input_images[i])
    plt.title(f"Input Photo {i + 1}")
    plt.axis('off')


    plt.subplot(5, 2, 2 * i + 2)
    plt.imshow(generated_images[i])
    plt.title(f"Generated Monet {i + 1}")
    plt.axis('off')

plt.tight_layout()
plt.show()

#zip file
with zipfile.ZipFile('images.zip', 'w') as zipf:
    for root, _, files in os.walk(output_dir):
        for file in files:
            zipf.write(os.path.join(root, file), file)