In [1]:
import os
import zipfile
import shutil
from pathlib import Path

In [2]:
zip_path = '/content/DIEBACK-20250409T065230Z-001.zip'
extract_dir = '/content/DIEBACK'

with zipfile.ZipFile(zip_path, 'r') as zip_ref:
    zip_ref.extractall(extract_dir)

# Optional: Clean nested folders if needed
for root, dirs, files in os.walk(extract_dir):
    for d in dirs:
        print(os.path.join(root, d))  # Check subfolder structure
    break

/content/DIEBACK/DIEBACK


In [3]:
import torchvision.transforms as transforms
from torchvision.datasets import ImageFolder
from torch.utils.data import DataLoader

image_size = 64
batch_size = 64

transform = transforms.Compose([
    transforms.Resize((image_size, image_size)),
    transforms.ToTensor(),  # Converts to [0, 1]
    transforms.Normalize((0.5,), (0.5,))  # Normalize to [-1, 1]
])

dataset = ImageFolder(root=extract_dir, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True)


In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F

class VAE(nn.Module):
    def __init__(self, latent_dim=128):
        super(VAE, self).__init__()
        self.encoder = nn.Sequential(
            nn.Conv2d(3, 32, 4, 2, 1),  # 64x64 → 32x32
            nn.ReLU(),
            nn.Conv2d(32, 64, 4, 2, 1),  # 32x32 → 16x16
            nn.ReLU(),
            nn.Conv2d(64, 128, 4, 2, 1),  # 16x16 → 8x8
            nn.ReLU(),
            nn.Flatten()
        )
        self.fc_mu = nn.Linear(128 * 8 * 8, latent_dim)
        self.fc_logvar = nn.Linear(128 * 8 * 8, latent_dim)
        self.fc_decode = nn.Linear(latent_dim, 128 * 8 * 8)

        self.decoder = nn.Sequential(
            nn.Unflatten(1, (128, 8, 8)),
            nn.ConvTranspose2d(128, 64, 4, 2, 1),  # 8x8 → 16x16
            nn.ReLU(),
            nn.ConvTranspose2d(64, 32, 4, 2, 1),  # 16x16 → 32x32
            nn.ReLU(),
            nn.ConvTranspose2d(32, 3, 4, 2, 1),  # 32x32 → 64x64
            nn.Tanh()
        )

    def reparameterize(self, mu, logvar):
        std = torch.exp(0.5 * logvar)
        eps = torch.randn_like(std)
        return mu + eps * std

    def forward(self, x):
        x_encoded = self.encoder(x)
        mu = self.fc_mu(x_encoded)
        logvar = self.fc_logvar(x_encoded)
        z = self.reparameterize(mu, logvar)
        decoded = self.fc_decode(z)
        decoded = self.decoder(decoded)
        return decoded, mu, logvar


In [5]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
vae = VAE().to(device)
optimizer = torch.optim.Adam(vae.parameters(), lr=1e-3)

def vae_loss(x_recon, x, mu, logvar):
    recon_loss = F.mse_loss(x_recon, x, reduction='sum')
    kl_div = -0.5 * torch.sum(1 + logvar - mu.pow(2) - logvar.exp())
    return recon_loss + kl_div

epochs = 20
vae.train()
for epoch in range(epochs):
    total_loss = 0
    for images, _ in dataloader:
        images = images.to(device)
        optimizer.zero_grad()
        reconstructed, mu, logvar = vae(images)
        loss = vae_loss(reconstructed, images, mu, logvar)
        loss.backward()
        optimizer.step()
        total_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {total_loss / len(dataloader.dataset):.2f}")


Epoch 1/20, Loss: 2565.04
Epoch 2/20, Loss: 2488.26
Epoch 3/20, Loss: 2413.42
Epoch 4/20, Loss: 2347.48
Epoch 5/20, Loss: 2283.98
Epoch 6/20, Loss: 2273.17
Epoch 7/20, Loss: 2175.17
Epoch 8/20, Loss: 2139.04
Epoch 9/20, Loss: 2113.99
Epoch 10/20, Loss: 2071.60
Epoch 11/20, Loss: 2063.54
Epoch 12/20, Loss: 2014.10
Epoch 13/20, Loss: 1981.59
Epoch 14/20, Loss: 1935.85
Epoch 15/20, Loss: 1892.45
Epoch 16/20, Loss: 1874.65
Epoch 17/20, Loss: 1815.12
Epoch 18/20, Loss: 1781.75
Epoch 19/20, Loss: 1740.12
Epoch 20/20, Loss: 1691.30


In [31]:
# Generate and save 100+ images from trained VAE
import torchvision.utils as vutils
import os

vae.eval()
vae_folder = "/content/generated_vae"
os.makedirs(vae_folder, exist_ok=True)

with torch.no_grad():
    for i in range(100):
        z = torch.randn(1, 128).to(device)
        sample = vae.decoder(vae.fc_decode(z)).cpu()
        vutils.save_image(sample, f"{vae_folder}/vae_sample_{i}.png", normalize=True)


In [50]:
# Define Generator and Discriminator for DCGAN
class Generator(nn.Module):
    def __init__(self, nz=100, ngf=64, nc=3):
        super(Generator, self).__init__()
        self.net = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1),
            nn.Tanh()
        )

    def forward(self, input):
        return self.net(input)



import torch.nn as nn

class Discriminator(nn.Module):
    def __init__(self):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            # input is (nc) x 64 x 64
            nn.Conv2d(3, 64, 4, 2, 1, bias=False),  # (64 x 32 x 32)
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(64, 128, 4, 2, 1, bias=False), # (128 x 16 x 16)
            nn.BatchNorm2d(128),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(128, 256, 4, 2, 1, bias=False), # (256 x 8 x 8)
            nn.BatchNorm2d(256),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(256, 512, 4, 2, 1, bias=False), # (512 x 4 x 4)
            nn.BatchNorm2d(512),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(512, 1, 4, 1, 0, bias=False),  # (1 x 1 x 1)
            nn.Sigmoid()
        )

    def forward(self, input):
        output = self.main(input)
        return output.view(-1)  # Flatten to (batch_size,)






In [51]:
# Train DCGAN
netG = Generator().to(device)
netD = Discriminator().to(device)

criterion = nn.BCELoss()
optimizerD = torch.optim.Adam(netD.parameters(), lr=0.0002, betas=(0.5, 0.999))
optimizerG = torch.optim.Adam(netG.parameters(), lr=0.0002, betas=(0.5, 0.999))
nz = 100
epochs = 20

for epoch in range(epochs):
    for i, (real_images, _) in enumerate(dataloader):
        real_images = real_images.to(device)
        b_size = real_images.size(0) # Update b_size with the actual batch size

        # Labels - Recreate labels with the correct batch size
        real_labels = torch.full((b_size,), 1., device=device)
        fake_labels = torch.full((b_size,), 0., device=device)

        # Train Discriminator
        netD.zero_grad()
        output_real = netD(real_images)

        # Reshape output_real to match real_labels
        output_real = output_real.view(-1) # flatten the output to have b_size elements

        loss_real = criterion(output_real, real_labels)

        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake_images = netG(noise)
        output_fake = netD(fake_images.detach())

        # Reshape output_fake to match fake_labels
        output_fake = output_fake.view(-1) # flatten the output to have b_size elements

        loss_fake = criterion(output_fake, fake_labels)

        lossD = loss_real + loss_fake
        lossD.backward()
        optimizerD.step()

        # Train Generator
        netG.zero_grad()
        output = netD(fake_images)
        # Reshape output to match real_labels
        output = output.view(-1) # flatten the output to have b_size elements
        lossG = criterion(output, real_labels)
        lossG.backward()
        optimizerG.step()

    print(f"Epoch {epoch+1}/{epochs} | Loss_D: {lossD.item():.4f} | Loss_G: {lossG.item():.4f}")



Epoch 1/20 | Loss_D: 1.3286 | Loss_G: 2.1037
Epoch 2/20 | Loss_D: 1.2455 | Loss_G: 3.3764
Epoch 3/20 | Loss_D: 0.5610 | Loss_G: 4.4465
Epoch 4/20 | Loss_D: 0.2919 | Loss_G: 4.5590
Epoch 5/20 | Loss_D: 0.2490 | Loss_G: 4.7280
Epoch 6/20 | Loss_D: 0.2296 | Loss_G: 5.3555
Epoch 7/20 | Loss_D: 0.1747 | Loss_G: 5.5980
Epoch 8/20 | Loss_D: 0.1372 | Loss_G: 5.7143
Epoch 9/20 | Loss_D: 0.1425 | Loss_G: 6.0916
Epoch 10/20 | Loss_D: 0.0932 | Loss_G: 6.1360
Epoch 11/20 | Loss_D: 0.1016 | Loss_G: 6.4213
Epoch 12/20 | Loss_D: 0.0963 | Loss_G: 6.7892
Epoch 13/20 | Loss_D: 0.1004 | Loss_G: 7.0395
Epoch 14/20 | Loss_D: 0.0790 | Loss_G: 6.8067
Epoch 15/20 | Loss_D: 0.1005 | Loss_G: 7.3989
Epoch 16/20 | Loss_D: 0.0669 | Loss_G: 7.1173
Epoch 17/20 | Loss_D: 0.0665 | Loss_G: 7.0737
Epoch 18/20 | Loss_D: 0.0660 | Loss_G: 7.5784
Epoch 19/20 | Loss_D: 0.0474 | Loss_G: 7.2551
Epoch 20/20 | Loss_D: 0.0614 | Loss_G: 7.8317


In [52]:
# Generate 100+ synthetic GAN images
gan_folder = "/content/generated_gan"
os.makedirs(gan_folder, exist_ok=True)

netG.eval()
with torch.no_grad():
    for i in range(100):
        noise = torch.randn(1, nz, 1, 1, device=device)
        fake_img = netG(noise).cpu()
        vutils.save_image(fake_img, f"{gan_folder}/gan_sample_{i}.png", normalize=True)


In [57]:
device = torch.device("cpu")  # Change to 'cpu'

In [59]:
import torchvision.utils as vutils
import os
!pip install pytorch-fid
from skimage.metrics import structural_similarity as ssim
from PIL import Image
import numpy as np
import torch
import cleanfid as fid
from cleanfid import fid

device = torch.device("cpu")

# Compute FID
fid_vae = fid.compute_fid("/content/DIEBACK/DIEBACK", vae_folder, device=device)
fid_gan = fid.compute_fid("/content/DIEBACK/DIEBACK", gan_folder, device=device)
print("FID (VAE):", fid_vae)
print("FID (GAN):", fid_gan)

# Compute SSIM
def calculate_ssim_folder(real_dir, generated_dir):
    real_imgs = sorted(os.listdir(real_dir))[:100]
    gen_imgs = sorted(os.listdir(generated_dir))[:100]

    total_ssim = 0
    for real_name, gen_name in zip(real_imgs, gen_imgs):
        real = np.array(Image.open(os.path.join(real_dir, real_name)).convert("L").resize((64,64)))
        gen = np.array(Image.open(os.path.join(generated_dir, gen_name)).convert("L").resize((64,64)))
        ssim_val = ssim(real, gen, data_range=255)
        total_ssim += ssim_val

    return total_ssim / len(real_imgs)

ssim_vae = calculate_ssim_folder("/content/DIEBACK/DIEBACK", vae_folder)
ssim_gan = calculate_ssim_folder("/content/DIEBACK/DIEBACK", gan_folder)
print("SSIM (VAE):", ssim_vae)
print("SSIM (GAN):", ssim_gan)






compute FID between two folders
Found 47 images in the folder /content/DIEBACK/DIEBACK


FID DIEBACK : 100%|██████████| 2/2 [00:19<00:00,  9.96s/it]


Found 100 images in the folder /content/generated_vae


FID generated_vae : 100%|██████████| 4/4 [00:38<00:00,  9.70s/it]


compute FID between two folders
Found 47 images in the folder /content/DIEBACK/DIEBACK


FID DIEBACK : 100%|██████████| 2/2 [00:18<00:00,  9.22s/it]


Found 100 images in the folder /content/generated_gan


FID generated_gan : 100%|██████████| 4/4 [00:40<00:00, 10.18s/it]


FID (VAE): 415.69851736698956
FID (GAN): 512.7580129378562
SSIM (VAE): 0.10447535899447119
SSIM (GAN): 0.03678041522274954
