# Set up steps
#### Can be ignored and skip to "Start of Model" section

In [None]:
import cv2
import csv
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import os
import glob
from scipy import stats
from typing import List, Tuple
from tqdm import tqdm
from tensorflow import keras
from tensorflow.keras import layers, models, optimizers
from tensorflow.keras.models import load_model
import time, math
from scipy.ndimage import binary_dilation

Image = np.ndarray
CharacterWithLabel = tuple[Image, str]

In [None]:
from google.colab import drive
drive.mount('/content/drive/', force_remount=True)
%ls
%cd ..

Mounted at /content/drive/
[0m[01;36mbin[0m@                        [01;36mlib32[0m@                    [01;34mroot[0m/
[01;34mboot[0m/                       [01;36mlib64[0m@                    [01;34mrun[0m/
[01;34mcontent[0m/                    [01;36mlibx32[0m@                   [01;36msbin[0m@
cuda-keyring_1.1-1_all.deb  [01;34mmedia[0m/                    [01;34msrv[0m/
[01;34mdatalab[0m/                    [01;34mmnt[0m/                      [01;34msys[0m/
[01;34mdev[0m/                        NGC-DL-CONTAINER-LICENSE  [30;42mtmp[0m/
[01;34metc[0m/                        [01;34mopt[0m/                      [01;34mtools[0m/
[01;34mhome[0m/                       [01;34mproc[0m/                     [01;34musr[0m/
[01;34mkaggle[0m/                     [01;34mpython-apt[0m/               [01;34mvar[0m/
[01;36mlib[0m@                        [01;32mpython-apt.tar.xz[0m*
/


In [None]:
# sanity check that we can access the necessary contents from drive
SOURCE_DIR = '/content/drive/MyDrive/cs4243-project/preprocessing/clean/char_normalised/combined_images_original.npy'

if os.path.exists(SOURCE_DIR):
    print(f"The folder exists.")
else:
    print(f"The folder does not exist.")

The folder exists.


In [None]:
import tensorflow as tf
print("Num GPUs Available: ", len(tf.config.list_physical_devices('GPU')))
print("Found GPU at: {}".format(tf.test.gpu_device_name()))

Num GPUs Available:  1
Found GPU at: /device:GPU:0


In [None]:
gpus = tf.config.list_physical_devices('GPU')
if gpus:
    print("GPU is available:", gpus)
    try:
        # Set TensorFlow to use only the first GPU
        tf.config.set_visible_devices(gpus[0], 'GPU')
        logical_gpus = tf.config.list_logical_devices('GPU')
        print(len(gpus), "Physical GPUs,", len(logical_gpus), "Logical GPU")
    except RuntimeError as e:
        # Visible devices must be set before GPUs have been initialized
        print(e)
else:
    print("No GPU devices found.")

GPU is available: [PhysicalDevice(name='/physical_device:GPU:0', device_type='GPU')]
1 Physical GPUs, 1 Logical GPU


# Start of Model
### Defining functions, models and model parameters

In [None]:
# =========================
# 0) Globals
# =========================

# Image / model params
IMG_SHAPE   = (80, 80, 1)
H, W, C     = IMG_SHAPE
LATENT_DIM  = 100
BATCH_SIZE  = 64
N_CRITIC    = 5          # D steps per G step
GP_WEIGHT   = 10.0
G_LR        = 1e-4     # TTUR (slower G)
D_LR        = 4e-4     # TTUR (faster D)

# =========================
# 1) Data loading
# =========================
def load_char_data(csv_path, npy_path):
    """Load images (.npy) + labels (.csv), convert to grayscale (1 channel), normalize to [-1,1]."""
    imgs = np.load(npy_path)                       # Could be (N, H, W) or (N, H, W, C_initial)

    labels = []
    with open(csv_path) as f:
        for i, row in enumerate(csv.reader(f)):
            labels.append(row[0])
    if labels:
        labels = labels[1:]                        # drop header if present

    # Ensure images have 1 channel (grayscale) and shape (N, H, W, 1)
    if len(imgs.shape) == 3: # (N, H, W) - assume grayscale, add channel
        imgs = np.expand_dims(imgs, axis=-1) # -> (N, H, W, 1)
    elif len(imgs.shape) == 4:
        if imgs.shape[-1] == 3: # (N, H, W, 3) - assume RGB, convert to grayscale
            imgs = np.mean(imgs, axis=-1, keepdims=True) # Average to grayscale, keep channel dim
        elif imgs.shape[-1] == 1: # (N, H, W, 1) - already grayscale
            pass
        else:
            raise ValueError(f"Unexpected number of channels in image data: {imgs.shape}. Expected 1 or 3.")
    else:
        # This handles cases like (N, H, W, 3, 1) if it came from previous faulty execution
        # or other unexpected dimensions. We want to convert it to (N, H, W, 1).
        print(f"Warning: Image data loaded with unexpected shape {imgs.shape}. Attempting to convert to (N, H, W, 1).")
        imgs = np.squeeze(imgs) # Remove all dimensions of size 1
        if len(imgs.shape) == 3: # After squeeze, if it's (N, H, W) or (N, H, W, C_final)
            if imgs.shape[-1] == 3: # If 3 channels after squeeze
                imgs = np.mean(imgs, axis=-1, keepdims=True)
            else: # Assume (N, H, W), add channel
                imgs = np.expand_dims(imgs, axis=-1)
        elif len(imgs.shape) == 4 and imgs.shape[-1] == 1: # If it became (N, H, W, 1) after squeeze
             pass
        else:
            raise ValueError(f"Failed to convert image data to (N, H, W, 1) from {imgs.shape} after squeeze.")


    imgs = imgs.astype("float32")
    # Normalize to [-1,1] for tanh generator
    imgs = (imgs / 127.5) - 1.0

    print(f"[load] X shape={imgs.shape} min={imgs.min():.3f} max={imgs.max():.3f} mean={imgs.mean():.3f} std={imgs.std():.3f}")
    return imgs, labels

def make_dataset(X, batch_size=BATCH_SIZE, shuffle=8192):
    """Create endless shuffled tf.data pipeline."""
    ds = tf.data.Dataset.from_tensor_slices(X)
    ds = ds.shuffle(shuffle).repeat().batch(batch_size, drop_remainder=True)
    return iter(ds)

# =========================
# 2) C-GAN compositing utils
# =========================
def random_blob_mask(h, w, blobs=3, p=0.45, dilate=2):
    mask = np.zeros((h, w), dtype=bool)
    for _ in range(blobs):
        seeds = np.random.rand(h, w) < p
        if dilate > 0:
            seeds = binary_dilation(seeds, iterations=np.random.randint(1, dilate + 1))
        mask = np.logical_or(mask, seeds)
    return mask.astype(np.float32)

def compose_two(img1, img2, mode="mask", alpha_range=(0.35, 0.65)):
    """img1/img2 in [-1,1], shape (H,W,1)."""
    if mode == "alpha":
        a = np.random.uniform(*alpha_range)
        return np.clip(a*img1 + (1.0-a)*img2, -1.0, 1.0)
    m = random_blob_mask(H, W, blobs=np.random.randint(2,5),
                         p=np.random.uniform(0.25,0.5),
                         dilate=np.random.randint(1,4))
    m = m[..., None]
    # mask operates in [-1,1] space
    return np.clip(m*img1 + (1.0-m)*img2, -1.0, 1.0)

def composite_batch(X, batch_size, k=2, mode="mask", keep_plain_prob=0.15):
    idxs = np.random.randint(0, X.shape[0], size=(batch_size, k))
    out = np.empty((batch_size, H, W, C), dtype=np.float32)
    for i in range(batch_size):
        if np.random.rand() < keep_plain_prob:
            out[i] = X[idxs[i,0]]
            continue
        img = X[idxs[i,0]]
        for j in range(1, k):
            img = compose_two(img, X[idxs[i,j]], mode=mode)
        out[i] = img
    return out

# =========================
# 3) Models
# =========================
# creates the images
def build_generator():
    return models.Sequential([
        layers.Input(shape=(LATENT_DIM,)),
        layers.Dense(10*10*256, use_bias=False),
        layers.BatchNormalization(), layers.LeakyReLU(),
        layers.Reshape((10,10,256)),
        layers.Conv2DTranspose(128, 5, strides=2, padding='same', use_bias=False),
        layers.BatchNormalization(), layers.LeakyReLU(),
        layers.Conv2DTranspose(64, 5, strides=2, padding='same', use_bias=False),
        layers.BatchNormalization(), layers.LeakyReLU(),
        layers.Conv2DTranspose(1, 5, strides=2, padding='same', activation='tanh', use_bias=False)
    ], name="generator")

# scores images and learns a function whose output difference approximates
# the Wasserstein distance between real and fake.
def build_critic():
    return models.Sequential([
        layers.Input(shape=IMG_SHAPE),
        layers.Conv2D(64, 5, strides=2, padding='same'), layers.LeakyReLU(0.2),
        layers.Conv2D(128, 5, strides=2, padding='same'), layers.LeakyReLU(0.2),
        layers.Flatten(),
        layers.Dense(1)  # no activation (critic score)
    ], name="critic")

# =========================
# 4) WGAN-GP training steps
# =========================
def interpolate(a, b):
    alpha = tf.random.uniform([tf.shape(a)[0], 1, 1, 1], 0., 1.)
    return a + alpha * (b - a)

def gradient_penalty(critic, real, fake):
    mixed = interpolate(real, fake)
    with tf.GradientTape() as gp_tape:
        gp_tape.watch(mixed)
        pred = critic(mixed, training=True)
    grads = gp_tape.gradient(pred, [mixed])[0]
    grads = tf.reshape(grads, [tf.shape(grads)[0], -1])
    gp = tf.reduce_mean((tf.norm(grads, axis=1) - 1.0) ** 2)
    return gp

@tf.function
def d_train_step(generator, critic, d_opt, real_batch):
    z = tf.random.normal([tf.shape(real_batch)[0], LATENT_DIM])
    fake_imgs = generator(z, training=True)
    with tf.GradientTape() as tape:
        real_logits = critic(real_batch, training=True)
        fake_logits = critic(fake_imgs, training=True)
        w_dist = tf.reduce_mean(fake_logits) - tf.reduce_mean(real_logits)
        gp = gradient_penalty(critic, real_batch, fake_imgs) * GP_WEIGHT
        d_loss = w_dist + gp
    grads = tape.gradient(d_loss, critic.trainable_variables)
    d_opt.apply_gradients(zip(grads, critic.trainable_variables))
    return d_loss, w_dist, gp

@tf.function
def g_train_step(generator, critic, g_opt, batch_size):
    z = tf.random.normal([batch_size, LATENT_DIM])
    with tf.GradientTape() as tape:
        fake_imgs = generator(z, training=True)
        fake_logits = critic(fake_imgs, training=True)
        g_loss = -tf.reduce_mean(fake_logits)
    grads = tape.gradient(g_loss, generator.trainable_variables)
    g_opt.apply_gradients(zip(grads, generator.trainable_variables))
    return g_loss

# =========================
# 5) Trainer
# =========================
def train(
    X, steps=20000, outdir=None,
    use_composites=False, k_comp=2, mode="mask",
    warmup_steps=5000, keep_plain_end=0.1,
    preview_every=500
):
    """
    X: np.array in [-1,1], shape (N,H,W,1)
    use_composites: enable C-GAN compositing after warmup.
    warmup_steps: train on plain reals for this many steps first.
    keep_plain_end: after warmup, probability to keep plain images in composite batches.
    """
    gen = build_generator()
    crt = build_critic()
    g_opt = tf.keras.optimizers.Adam(G_LR, beta_1=0.0, beta_2=0.9)
    d_opt = tf.keras.optimizers.Adam(D_LR, beta_1=0.0, beta_2=0.9)

    d_losses = []
    g_losses = []

    ds_iter = make_dataset(X, batch_size=BATCH_SIZE)

    def preview(step, n=16):
        z = tf.random.normal([n, LATENT_DIM])
        x = gen(z, training=False).numpy()
        x = (x + 1.0) * 0.5  # [-1,1] -> [0,1]
        cols = int(math.sqrt(n)); rows = math.ceil(n/cols)
        plt.figure(figsize=(cols*2, rows*2))
        for i in range(n):
            plt.subplot(rows, cols, i+1)
            plt.imshow(x[i,...,0], cmap='gray', vmin=0, vmax=1); plt.axis('off')
        plt.suptitle(f"samples @ step {step}")
        plt.show()
        if outdir:
            os.makedirs(outdir, exist_ok=True)
            for i in range(n):
                plt.imsave(os.path.join(outdir, f"sample_{step:06d}_{i:02d}.png"), x[i,...,0], cmap='gray', vmin=0, vmax=1)

    for step in range(steps):
        # ----- build real batch (plain or composite) -----
        if use_composites and step >= warmup_steps:
            # ramp keep_plain from ~0.8 at warmup end -> keep_plain_end
            t = (step - warmup_steps) / max(1, (steps - warmup_steps))
            keep_plain = 0.8*(1-t) + keep_plain_end
            real_batch = composite_batch(X, BATCH_SIZE, k=k_comp, mode=mode, keep_plain_prob=keep_plain).astype(np.float32)
        else:
            real_batch = next(ds_iter).numpy()

        # ----- N_CRITIC steps -----
        for _ in range(N_CRITIC):
            d_loss, wdist, gp = d_train_step(gen, crt, d_opt, real_batch)

        # ----- 1 G step -----
        g_loss = g_train_step(gen, crt, g_opt, BATCH_SIZE)

        if step % 200 == 0:
            print(f"step {step:05d} | D: {d_loss.numpy():.3f} (W {wdist.numpy():.3f}, GP {gp.numpy():.3f}) | G: {g_loss.numpy():.3f} | comps={use_composites and step>=warmup_steps}")

        if step % preview_every == 0:
            preview(step)

        d_losses.append(d_loss)
        g_losses.append(g_loss)

    return gen, crt, d_losses, g_losses


# Loading the data and run training

In [None]:
# Run
csvFile   = "/content/drive/MyDrive/cs4243-project/preprocessing/clean/char_normalised/combined_labels_original.csv"
SOURCE    = "/content/drive/MyDrive/cs4243-project/preprocessing/clean/char_normalised/combined_images_original.npy"

X_train, _ = load_char_data(csvFile, SOURCE)

# --- Simple sanity peek at real data ---
plt.figure(figsize=(6,6))
for i in range(9):
    idx = np.random.randint(0, len(X_train))
    plt.subplot(3,3,i+1)
    plt.imshow(((X_train[idx]+1.0)*0.5)[...,0], cmap='gray', vmin=0, vmax=1)
    plt.axis('off')
plt.suptitle("REAL samples")
plt.show()

# --- Train ---
outdir = "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples_3"
generator, critic, d_loss, g_loss = train(
    X_train,
    steps=10000,
    outdir=outdir,
    use_composites=True,   # turn on C-GAN compositing
    k_comp=2,              # compose 2 sources (like paper)
    mode="mask",           # pixel replacement
    warmup_steps=8000,     # learn plain manifold first
    keep_plain_end=0.10,   # still keep some plain reals later
    preview_every=500
)

# --- Save generator ---
generator.save(os.path.join(outdir, "generator_wgangp_cgan_3.h5"))
print(f"Saved generator to {outdir}")

# Plot generator and critic loss

In [None]:
g_loss_values = [g.numpy() for g in g_loss]
d_loss_values = [d.numpy() for d in d_loss]

plt.figure(figsize=(10, 6))
plt.plot(g_loss_values, label='Generator Loss')
plt.plot(d_loss_values, label='Discriminator Loss')
plt.title('Generator and Discriminator Loss Over Training Steps')
plt.xlabel('Training Step')
plt.ylabel('Loss Value')
plt.legend()
plt.grid(True)
plt.show()

# Generating characters

In [None]:
# loading of the model, had 3 different models with different hyperparameter tuning
# model_path = "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples/generator_wgangp_cgan.h5"
# model_path = "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples_2/generator_wgangp_cgan_2.h5"
model_path = "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples_3/generator_wgangp_cgan_3.h5"
gen = load_model(model_path)



In [None]:
LATENT_DIM = 100  # same as training

# --- Generate random latent vectors ---
n = 16   # number of images to generate
z = np.random.normal(0, 1, (n, LATENT_DIM))
generated = gen.predict(z, verbose=0)   # shape (n, 80, 80, 1), range [-1,1]
generated = (generated + 1.0) * 0.5     # map to [0,1] for viewing


# --- Preview as grid ---
cols = int(np.sqrt(n)); rows = int(np.ceil(n/cols))
plt.figure(figsize=(cols*2, rows*2))
for i in range(n):
    plt.subplot(rows, cols, i+1)
    plt.imshow(final_images[i,...,0], cmap='gray', vmin=0, vmax=1)
    plt.axis('off')
plt.suptitle("Generated characters")
plt.show()

### Saving Generated Characters

In [None]:
# Save generated characters to be used for training and testing
save_dir = "/content/drive/MyDrive/cs4243-project/outputs/generated_characters"
os.makedirs(save_dir, exist_ok=True)

print(f"Saving {len(final_images)} generated characters to {save_dir}")

for i, img_array in enumerate(final_images):
    # Ensure the image array is 2D for grayscale saving
    if img_array.ndim == 3 and img_array.shape[2] == 1:
        img_array = img_array[..., 0]

    file_path = os.path.join(save_dir, f"generated_char_{i:03d}.png")
    plt.imsave(file_path, img_array, cmap='gray', vmin=0, vmax=1)

print("Generated characters saved successfully.")

# Evaluations

This evaluation aims to compare the quality of several WGAN-based generators by using both perceptual and distribution-level metrics. We generate a large set of samples from each model and compare them against real character images using **FID**, which measures how close the generated image distribution is to the real one, and **SSIM/PSNR**, which quantify visual similarity to the nearest real sample.

These metrics help reveal whether a model produces sharp, realistic characters, avoids mode collapse, and captures the variability present in real data. Together, they provide a structured and objective way to determine which generator produces the most realistic and useful CAPTCHA characters.

In [None]:
# Evaluation script for multiple WGAN generators
import os
import numpy as np
from glob import glob
from tqdm import tqdm
import tensorflow as tf
from tensorflow.keras.models import load_model
from tensorflow.keras.applications.inception_v3 import InceptionV3, preprocess_input as inception_preprocess
from scipy.linalg import sqrtm
from skimage.metrics import structural_similarity as ssim
from skimage.metrics import peak_signal_noise_ratio as psnr
from PIL import Image

# ---------- PARAMETERS ----------
MODEL_PATHS = [
    "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples/generator_wgangp_cgan.h5",
    "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples_2/generator_wgangp_cgan_2.h5",
    "/content/drive/MyDrive/cs4243-project/outputs/cgan_samples_3/generator_wgangp_cgan_3.h5",
]
LATENT_DIM = 100
N_SAMPLES = 2048  # number of generated samples to use for FID/IS
BATCH = 64
REAL_DIR = '/content/drive/MyDrive/cs4243-project/preprocessing/clean/char_normalised/combined_labels_original.csv'
REAL_NPY = '/content/drive/MyDrive/cs4243-project/preprocessing/clean/char_normalised/combined_images_original.npy'
IMAGE_SHAPE = (80, 80)  # your generator output shape (H,W)
RANDOM_SEED = 42
CLASSIFIER_PATH = '/content/drive/MyDrive/cs4243-project/models/classfier_test.h5'

# ---------- UTILITIES ----------
np.random.seed(RANDOM_SEED)
tf.random.set_seed(RANDOM_SEED)

def load_real_images_from_dir(dirname, target_shape=(80,80), max_images=None):
    paths = sorted(glob(os.path.join(dirname, "*.*")))
    if max_images:
        paths = paths[:max_images]
    imgs = []
    for p in paths:
        try:
            im = Image.open(p).convert('L').resize(target_shape, Image.BILINEAR)
            arr = np.asarray(im, dtype=np.float32) / 255.0  # [0,1]
            if arr.ndim == 2:
                arr = arr[..., np.newaxis]
            imgs.append(arr)
        except Exception as e:
            print("Skipping", p, ":", e)
    imgs = np.stack(imgs, axis=0)
    return imgs

def load_real_images(npy_path=None, dir_path=None, target_shape=(80,80), max_images=None):
    if npy_path:
        data = np.load(npy_path)
        # Expect shape (N,H,W) or (N,H,W,1) or (N,H,W,3)
        if data.ndim == 3:
            data = data[..., np.newaxis]
        # normalize if necessary
        if data.max() > 2.0:
            data = data.astype(np.float32) / 255.0
        if target_shape is not None and (data.shape[1], data.shape[2]) != target_shape:
            # resize
            resized = []
            for im in data:
                p = Image.fromarray((im.squeeze()*255).astype(np.uint8)).resize(target_shape, Image.BILINEAR)
                a = np.asarray(p, dtype=np.float32)/255.0
                if a.ndim==2: a = a[..., np.newaxis]
                resized.append(a)
            data = np.stack(resized, axis=0)
        if max_images:
            data = data[:max_images]
        return data
    elif dir_path:
        return load_real_images_from_dir(dir_path, target_shape=target_shape, max_images=max_images)
    else:
        raise ValueError("Provide npy_path or dir_path")

def to_3ch_and_resize(images, size=(299,299)):
    # images: (N,H,W,1) values in [0,1]
    N = images.shape[0]
    out = np.zeros((N, size[0], size[1], 3), dtype=np.float32)
    for i in range(N):
        im = images[i,...,0]
        pil = Image.fromarray((im*255).astype(np.uint8)).resize((size[1], size[0]), Image.BILINEAR)
        arr = np.asarray(pil, dtype=np.float32) / 255.0
        if arr.ndim == 2:
            arr = np.stack([arr,arr,arr], axis=-1)
        out[i] = arr
    return out

# ---------- FID / IS helpers ----------
def get_inception_activations(images, model, batch_size=32):
    # images assumed in [0,1], shape (N,H,W,3); model expects preprocessed inputs
    N = images.shape[0]
    acts = []
    for i in range(0, N, batch_size):
        batch = images[i:i+batch_size]
        x = inception_preprocess(batch*255.0)  # Inception preprocess expects pixels in range [-1,1]
        act = model.predict(x, verbose=0)
        acts.append(act)
    acts = np.vstack(acts)
    return acts

def calculate_frechet_distance(mu1, sigma1, mu2, sigma2, eps=1e-6):
    """Numpy implementation of FID formula"""
    diff = mu1 - mu2
    covmean = sqrtm(sigma1.dot(sigma2))
    # numerical error might give slight imaginary component
    if np.iscomplexobj(covmean):
        covmean = covmean.real
    fid = diff.dot(diff) + np.trace(sigma1 + sigma2 - 2*covmean)
    return np.real(fid)

def compute_fid(real_images, gen_images, inception_model):
    # both inputs should be (N,H,W,1) in [0,1]
    real_3 = to_3ch_and_resize(real_images, size=(299,299))
    gen_3 = to_3ch_and_resize(gen_images, size=(299,299))
    act_real = get_inception_activations(real_3, inception_model, batch_size=BATCH)
    act_gen  = get_inception_activations(gen_3, inception_model, batch_size=BATCH)
    mu1 = np.mean(act_real, axis=0)
    mu2 = np.mean(act_gen, axis=0)
    sigma1 = np.cov(act_real, rowvar=False)
    sigma2 = np.cov(act_gen, rowvar=False)
    fid = calculate_frechet_distance(mu1, sigma1, mu2, sigma2)
    return fid

# ---------- SSIM / PSNR nearest-neighbor ----------
def compute_nearest_ssim_psnr(real_images, gen_images, sample_n=1024):
    # For each generated image, find nearest real by L2 in pixel space (fast brute force)
    Ngen = min(len(gen_images), sample_n)
    Nreal = len(real_images)
    gens = gen_images[:Ngen]
    reals = real_images
    ssim_vals = []
    psnr_vals = []
    # flatten reals
    reals_flat = reals.reshape((Nreal, -1))
    for g in gens:
        g_flat = g.reshape(-1)
        # find nearest by Euclidean
        dists = np.sum((reals_flat - g_flat)**2, axis=1)
        idx = np.argmin(dists)
        r = reals[idx,...,0]
        ssim_v = ssim((r*255).astype(np.uint8), (g[...,0]*255).astype(np.uint8), data_range=255)
        psnr_v = psnr((r*255).astype(np.uint8), (g[...,0]*255).astype(np.uint8), data_range=255)
        ssim_vals.append(ssim_v)
        psnr_vals.append(psnr_v)
    return np.mean(ssim_vals), np.std(ssim_vals), np.mean(psnr_vals), np.std(psnr_vals)

# ---------- Generate samples with a generator ----------
def generate_images_from_generator(gen, n_samples, latent_dim, batch_size=64):
    out = []
    steps = int(np.ceil(n_samples / batch_size))
    for _ in range(steps):
        z = np.random.normal(0, 1, (batch_size, latent_dim))
        g = gen.predict(z, verbose=0)
        # map from [-1,1] -> [0,1] if necessary
        if g.min() < -0.5:
            g = (g + 1.0) * 0.5
        # ensure shape (batch,H,W,1)
        if g.ndim == 3:
            g = g[..., np.newaxis]
        out.append(g)
    out = np.vstack(out)[:n_samples]
    return out.astype(np.float32)

# ---------- Optional classifier evaluation ----------
def evaluate_with_classifier(classifier_model, gen_images, label_map=None):
    # classifier expects same preproc as used in training; here we assume it accepts (N,H,W,1) with [0,1]
    preds = classifier_model.predict(gen_images, verbose=0)
    pred_labels = np.argmax(preds, axis=1)
    confidences = np.max(preds, axis=1)
    # if label_map exists you can map indices -> chars. Return distribution & mean confidence.
    return pred_labels, confidences

# ---------- MAIN: run evaluation on list of models ----------
def evaluate_models(model_paths, real_images, latent_dim=100, n_samples=2048):
    # Prepare Inception model for feature extraction
    inception = InceptionV3(include_top=False, pooling='avg', input_shape=(299,299,3))
    results = []
    for path in model_paths:
        print("Loading:", path)
        gen = load_model(path, compile=False)
        # generate images
        gen_images = generate_images_from_generator(gen, n_samples, latent_dim, batch_size=BATCH)
        print(f"Generated {len(gen_images)} images from {os.path.basename(path)}")
        # sample same number of real images (random)
        if len(real_images) < n_samples:
            idx = np.random.choice(len(real_images), n_samples, replace=True)
        else:
            idx = np.random.choice(len(real_images), n_samples, replace=False)
        real_sample = real_images[idx]
        # FID
        fid_val = compute_fid(real_sample, gen_images, inception)
        # SSIM / PSNR nearest neighbor (sample subset)
        ssim_mean, ssim_std, psnr_mean, psnr_std = compute_nearest_ssim_psnr(real_images, gen_images, sample_n=512)
        res = {
            "model_path": path,
            "fid": float(fid_val),
            "ssim_mean": float(ssim_mean),
            "ssim_std": float(ssim_std),
            "psnr_mean": float(psnr_mean),
            "psnr_std": float(psnr_std),
            "n_generated": len(gen_images)
        }
        results.append(res)
        print(" -> FID:", res["fid"])
        print(f" -> SSIM mean: {res['ssim_mean']:.4f} ± {res['ssim_std']:.4f}")
        print(f" -> PSNR mean: {res['psnr_mean']:.2f} ± {res['psnr_std']:.2f}")
        print("--------------------------------------------------")
    return results

if __name__ == "__main__":
    # Load real dataset
    if REAL_NPY:
        real_images = load_real_images(npy_path=REAL_NPY, target_shape=IMAGE_SHAPE)
    else:
        real_images = load_real_images(dir_path=REAL_DIR, target_shape=IMAGE_SHAPE, max_images=10000)
    print("Real images loaded:", real_images.shape)
    metrics = evaluate_models(MODEL_PATHS, real_images, latent_dim=LATENT_DIM, n_samples=N_SAMPLES)

    # optional: classifier evaluation (if CLASSIFIER_PATH provided)
    if CLASSIFIER_PATH:
        print("Loading classifier:", CLASSIFIER_PATH)
        clf = load_model(CLASSIFIER_PATH, compile=False)
        for m in MODEL_PATHS:
            gen = load_model(m, compile=False)
            gen_samples = generate_images_from_generator(gen, 1024, LATENT_DIM, batch_size=BATCH)
            preds, confs = evaluate_with_classifier(clf, gen_samples)
            acc = np.mean(preds == preds)  # replace with true label comparison if you have target labels
            print(f"{os.path.basename(m)} classifier mean confidence: {confs.mean():.4f}")

    # Print summary table
    import pandas as pd
    df = pd.DataFrame(metrics)
    df = df.sort_values("fid")
    print("\nSummary (lower FID = better):")
    display(df)
