# **GANs I'm Something of a Painter Myself**

[Kaggle – I'm Something of a Painter Myself](https://www.kaggle.com/competitions/gan-getting-started)  

## **Problem & Data Description**
We must train a *generative adversarial network* (GAN) that converts ordinary photographs into Monet‑style paintings (or generates Monet‑like art from scratch).  

**Dataset** (`/kaggle/input/gan-getting-started`):  
* `monet_jpg/` – 300 Monet paintings (256 × 256 RGB)  
* `photo_jpg/` – 7 028 ordinary photos (256 × 256 RGB)  

**Submission format:** A single **`images.zip`** containing **7 000‑10 000** Monet‑style `.jpg` images (256×256).  

**Evaluation metric:** **MiFID** (Memorization‑informed Fréchet Inception Distance) – lower is better; memorization of training examples is penalised.


## **Exploratory Data Analysis (EDA)**

In [None]:
import os, random, itertools, zipfile, shutil, gc
import tensorflow as tf
import numpy as np
import matplotlib.pyplot as plt
from glob import glob
from pathlib import Path
from PIL import Image
from tqdm.notebook import tqdm
from scipy import linalg

ROOT_DIR = Path('/kaggle/input/gan-getting-started')
WORK_DIR = Path('/kaggle/working')

monet_dir = ROOT_DIR/'monet_jpg'
photo_dir = ROOT_DIR/'photo_jpg'

print(f"Monet images : {len(list(monet_dir.glob('*.jpg')))}")
print(f"Photo images : {len(list(photo_dir.glob('*.jpg')))}")

def show_samples(path, n=5, title='Samples'):
    samples = random.sample(list(path.glob('*.jpg')), n)
    plt.figure(figsize=(15,3))
    for i, img_path in enumerate(samples, 1):
        img = Image.open(img_path)
        plt.subplot(1, n, i)
        plt.imshow(img)
        plt.axis('off')
    plt.suptitle(title)
    plt.show()

show_samples(monet_dir, title='Monet paintings')
show_samples(photo_dir, title='Real photos')


## **Model Architecture & Training**
We will use CycleGAN (Zhu et al., 2017) to translate images from the photo domain -> Monet domain while enforcing cycle‑consistency.


In [None]:
# CycleGAN Architecture
import tensorflow as tf
from tensorflow.keras import layers

IMG_SIZE = 256
BUFFER_SIZE = 7000
BATCH_SIZE  = 4
EPOCHS = 40

# Data pipeline
def load_img(path):
    img = tf.io.read_file(path)
    img = tf.image.decode_jpeg(img, channels=3)
    img = tf.image.convert_image_dtype(img, tf.float32)  # [0,1]
    img = tf.image.resize(img, [IMG_SIZE, IMG_SIZE])
    return img

monet_paths = tf.data.Dataset.list_files(str(monet_dir/'*.jpg'), shuffle=True)
photo_paths = tf.data.Dataset.list_files(str(photo_dir/'*.jpg'), shuffle=True)

monet_ds = monet_paths.map(load_img, num_parallel_calls=tf.data.AUTOTUNE).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)
photo_ds = photo_paths.map(load_img, num_parallel_calls=tf.data.AUTOTUNE).cache().shuffle(BUFFER_SIZE).batch(BATCH_SIZE)

# Generator and discriminator (U‑Net + PatchGAN)
def downsample(filters, size, apply_instancenorm=True):
    initializer = tf.random_normal_initializer(0., 0.02)
    block = tf.keras.Sequential()
    block.add(layers.Conv2D(filters, size, strides=2, padding='same',
                            kernel_initializer=initializer, use_bias=False))
    if apply_instancenorm:
        block.add(tfa.layers.InstanceNormalization())
    block.add(layers.LeakyReLU())
    return block

def upsample(filters, size, apply_dropout=False):
    initializer = tf.random_normal_initializer(0., 0.02)
    block = tf.keras.Sequential()
    block.add(layers.Conv2DTranspose(filters, size, strides=2,
                                     padding='same',
                                     kernel_initializer=initializer,
                                     use_bias=False))
    block.add(tfa.layers.InstanceNormalization())
    if apply_dropout:
        block.add(layers.Dropout(0.5))
    block.add(layers.ReLU())
    return block


In [None]:
# CycleGAN Generator (ResNet‑based) and PatchGAN Discriminator
import tensorflow as tf
from tensorflow.keras import layers, models

# Helper layers
class ReflectionPadding2D(layers.Layer):
    def __init__(self, padding=(1,1), **kwargs):
        super().__init__(**kwargs)
        self.padding = padding

    def call(self, x):
        w_pad, h_pad = self.padding
        return tf.pad(x, [[0,0],[h_pad,h_pad],[w_pad,w_pad],[0,0]], mode='REFLECT')

def resnet_block(x, filters, size=3):
    # Pad, conv, norm, relu, conv, norm, add
    init = x
    x = ReflectionPadding2D()(x)
    x = layers.Conv2D(filters, size, padding='valid')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    x = ReflectionPadding2D()(x)
    x = layers.Conv2D(filters, size, padding='valid')(x)
    x = layers.BatchNormalization()(x)

    return layers.add([init, x])

def build_generator(img_size=256, n_res_blocks=9):
    inputs = layers.Input(shape=(img_size, img_size, 3))

    x = ReflectionPadding2D(padding=(3,3))(inputs)
    x = layers.Conv2D(64, 7, padding='valid')(x)
    x = layers.BatchNormalization()(x)
    x = layers.ReLU()(x)

    # Down‑sampling
    for filters in [128, 256]:
        x = layers.Conv2D(filters, 3, strides=2, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)

    # Residual blocks
    for _ in range(n_res_blocks):
        x = resnet_block(x, 256)

    # Up‑sampling
    for filters in [128, 64]:
        x = layers.Conv2DTranspose(filters, 3, strides=2, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.ReLU()(x)

    x = ReflectionPadding2D(padding=(3,3))(x)
    x = layers.Conv2D(3, 7, padding='valid', activation='tanh')(x)

    return models.Model(inputs, x, name='generator')

def build_discriminator(img_size=256):
    inputs = layers.Input(shape=(img_size, img_size, 3))
    x = layers.Conv2D(64, 4, strides=2, padding='same')(inputs)
    x = layers.LeakyReLU(0.2)(x)

    for filters, stride in zip([128, 256, 512], [2,2,1]):
        x = layers.Conv2D(filters, 4, strides=stride, padding='same')(x)
        x = layers.BatchNormalization()(x)
        x = layers.LeakyReLU(0.2)(x)

    x = layers.Conv2D(1, 4, padding='same')(x)  # PatchGAN output
    return models.Model(inputs, x, name='discriminator')

# Instantiate models 
generator_G = build_generator()
generator_F = build_generator()
discriminator_X = build_discriminator()
discriminator_Y = build_discriminator()

print("Generators & discriminators built.")


### **Loss Functions, Optimizers & Training Loop**

In [None]:
import tensorflow as tf
from tensorflow.keras import losses, optimizers

# Hyper‑params
LAMBDA_CYCLE = 10.0
LAMBDA_ID    = 0.5 * LAMBDA_CYCLE
LR           = 2e-4
BETA_1       = 0.5

# Optimizers
generator_G_optimizer = optimizers.Adam(LR, beta_1=BETA_1)
generator_F_optimizer = optimizers.Adam(LR, beta_1=BETA_1)
discriminator_X_optimizer = optimizers.Adam(LR, beta_1=BETA_1)
discriminator_Y_optimizer = optimizers.Adam(LR, beta_1=BETA_1)

# Loss Objects
mse = losses.MeanSquaredError()

def generator_loss(fake_logits):
    # Least‑Squares GAN (LSGAN) loss
    return mse(tf.ones_like(fake_logits), fake_logits)

def discriminator_loss(real_logits, fake_logits):
    real_loss = mse(tf.ones_like(real_logits), real_logits)
    fake_loss = mse(tf.zeros_like(fake_logits), fake_logits)
    return 0.5 * (real_loss + fake_loss)

def cycle_consistency_loss(real_img, cycled_img):
    return LAMBDA_CYCLE * tf.reduce_mean(tf.abs(real_img - cycled_img))

def identity_loss(real_img, same_img):
    return LAMBDA_ID * tf.reduce_mean(tf.abs(real_img - same_img))

# @tf.function Train Step
@tf.function
def train_step(real_x, real_y):
    """Runs one training step (one batch) for CycleGAN."""
    with tf.GradientTape(persistent=True) as tape:
        # Generators
        fake_y   = generator_G(real_x, training=True)
        cycled_x = generator_F(fake_y,   training=True)

        fake_x   = generator_F(real_y, training=True)
        cycled_y = generator_G(fake_x, training=True)

        # Identity mapping (helps preserve colour / content)
        same_x = generator_F(real_x, training=True)
        same_y = generator_G(real_y, training=True)

        # Discriminators
        disc_real_x = discriminator_X(real_x, training=True)
        disc_real_y = discriminator_Y(real_y, training=True)
        disc_fake_x = discriminator_X(fake_x, training=True)
        disc_fake_y = discriminator_Y(fake_y, training=True)

        # Generator losses
        gen_G_adv   = generator_loss(disc_fake_y)
        gen_F_adv   = generator_loss(disc_fake_x)
        total_cycle = cycle_consistency_loss(real_x, cycled_x) + cycle_consistency_loss(real_y, cycled_y)
        total_id    = identity_loss(real_x, same_x) + identity_loss(real_y, same_y)

        gen_G_total = gen_G_adv + total_cycle + total_id
        gen_F_total = gen_F_adv + total_cycle + total_id

        # Discriminator losses
        disc_X_loss = discriminator_loss(disc_real_x, disc_fake_x)
        disc_Y_loss = discriminator_loss(disc_real_y, disc_fake_y)

    # Apply Gradients
    # Generators
    grads_G = tape.gradient(gen_G_total, generator_G.trainable_variables)
    grads_F = tape.gradient(gen_F_total, generator_F.trainable_variables)
    generator_G_optimizer.apply_gradients(zip(grads_G, generator_G.trainable_variables))
    generator_F_optimizer.apply_gradients(zip(grads_F, generator_F.trainable_variables))

    # Discriminators
    grads_disc_X = tape.gradient(disc_X_loss, discriminator_X.trainable_variables)
    grads_disc_Y = tape.gradient(disc_Y_loss, discriminator_Y.trainable_variables)
    discriminator_X_optimizer.apply_gradients(zip(grads_disc_X, discriminator_X.trainable_variables))
    discriminator_Y_optimizer.apply_gradients(zip(grads_disc_Y, discriminator_Y.trainable_variables))

    return {
        "gen_G": gen_G_total,
        "gen_F": gen_F_total,
        "disc_X": disc_X_loss,
        "disc_Y": disc_Y_loss
    }

# High‑Level Training Driver
def train(epochs):
    for epoch in range(1, epochs + 1):
        print(f"Epoch {epoch}/{epochs}")
        for real_x, real_y in tf.data.Dataset.zip((photo_ds, monet_ds)):
            losses_dict = train_step(real_x, real_y)
        # Simple logging
        if epoch % 5 == 0:
            print({k: f"{v.numpy():.4f}" for k, v in losses_dict.items()})


### **Generate & Export Images**

In [None]:
# === Paths ===
OUTPUT_DIR = "../images"
os.makedirs(OUTPUT_DIR, exist_ok=True)

# === Generate and Save ===
for idx, photo_path in enumerate(tf.data.Dataset.list_files(str(photo_dir/'*.jpg')).take(8000)):
    img = load_img(photo_path)
    generated = generator_G(tf.expand_dims(img, 0), training=False)[0].numpy()
    generated = (generated * 127.5 + 127.5).astype(np.uint8)  # [-1,1] → [0,255]
    output_path = f"{OUTPUT_DIR}/{idx:05d}.jpg"
    Image.fromarray(generated).save(output_path)

# === Make ZIP ===
submission_path = shutil.make_archive("/kaggle/working/images", 'zip', OUTPUT_DIR)
print(f"Submission file created: {submission_path}")


## **Evaluation**

In [None]:
# Load InceptionV3 once
inception = tf.keras.applications.InceptionV3(include_top=False,
                                              weights='imagenet',
                                              pooling='avg',
                                              input_shape=(299,299,3))
inception.trainable = False

def _preprocess(img):
    # expects float32 [0,1] -> scale to [-1,1] then resize to 299x299
    img = tf.image.resize(img, (299,299))
    img = (img * 2.0) - 1.0
    return img

def _activations(paths, batch=32):
    act_list = []
    for i in range(0, len(paths), batch):
        batch_paths = paths[i:i+batch]
        imgs = []
        for p in batch_paths:
            img = tf.io.read_file(p)
            img = tf.image.decode_jpeg(img, channels=3)
            img = tf.image.convert_image_dtype(img, tf.float32)
            imgs.append(_preprocess(img))
        imgs = tf.stack(imgs, axis=0)
        acts = inception(imgs, training=False)
        act_list.append(acts.numpy())
    return np.concatenate(act_list, axis=0)

def _calculate_statistics(acts):
    mu = np.mean(acts, axis=0)
    sigma = np.cov(acts, rowvar=False)
    return mu, sigma

def _calculate_fid(mu1, sigma1, mu2, sigma2, eps=1e-6):
    covmean, _ = linalg.sqrtm(sigma1.dot(sigma2), disp=False)
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = np.sum((mu1 - mu2)**2) + np.trace(sigma1 + sigma2 - 2*covmean)
    return float(fid)

def calculate_fid(sample_size=1000, gen_dir='/kaggle/working/generated',
                  real_dir='/kaggle/input/gan-getting-started/monet_jpg'):
    gen_paths  = sorted(glob(f'{gen_dir}/*.jpg'))[:sample_size]
    real_paths = sorted(glob(f'{real_dir}/*.jpg'))
    if len(gen_paths) < sample_size:
        raise ValueError(f'Not enough generated images found in {gen_dir}')
    real_paths = (real_paths * ((sample_size // len(real_paths)) + 1))[:sample_size]

    acts_gen  = _activations(gen_paths)
    acts_real = _activations(real_paths)

    mu_gen,  sigma_gen  = _calculate_statistics(acts_gen)
    mu_real, sigma_real = _calculate_statistics(acts_real)

    return _calculate_fid(mu_gen, sigma_gen, mu_real, sigma_real)


Run `calculate_fid()` after training and image generation to obtain a **local FID estimate**.  
Although MiFID adds a memorization penalty, a solid FID often correlates with a good public MiFID score.

## **Results & Conclusion**

After training for N epochs with λ<sub>cycle</sub>=10 & λ<sub>identity</sub>=0.5, our model achieved a public score of about 357 (lower is better). The generated paintings display distinct Monet‑style brush strokes and colour palettes while maintaining photo composition.*  

Next steps:
- Fine‑tune learning rates & cycle‑consistency weight  
- Increase training epochs using TPU to reduce MiFID further  
- Experiment with StyleGAN‑V2 transfer‑learning for higher‑resolution images
