<a href="https://colab.research.google.com/github/ElevnthKuria/IP102/blob/main/IP102_Augumented%2C_CutOut%2C_MixUp%2C_DCGAN_and_StyleGAN2%2C_Synthetic_Generation_Colab_Notebook.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

# Importing Libraries and Data

In [None]:
# !pip3 install pandas  matplotlib seaborn scikit-learn

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
import os
import kagglehub

# Set custom path
os.environ["KAGGLEHUB_CACHE"] = "/content/kagglehub"

path = kagglehub.dataset_download("rtlmhjbn/ip02-dataset")
print("Downloaded to:", path)

Using Colab cache for faster access to the 'ip02-dataset' dataset.
Downloaded to: /kaggle/input/ip02-dataset


In [None]:
# ! ls /root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification

ls: cannot access '/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification': No such file or directory


In [None]:
from pathlib import Path
import os

# Root synthetic folder
SYNTHETIC_ROOT = Path("/content/drive/MyDrive/IP102_Synthetic")

# Define technique names (consistent with the rest of the notebook)
TECHNIQUES = [
    "IP102_augmented",
    "IP102_cutout",
    "IP102_mixup_cutmix",
    "DCGAN",
    "StyleGAN2",
    "CycleGAN",
    "cGAN",
    "VAE",
    "Diffusion",
    "DeepSMOTE"
]

SELECTED_CLASSES = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]

# Create directory structure
for tech in TECHNIQUES:
    for cls in SELECTED_CLASSES:
        target_dir = SYNTHETIC_ROOT / tech / str(cls)
        target_dir.mkdir(parents=True, exist_ok=True)
print("‚úÖ Folder structure created.")


‚úÖ Folder structure created.


In [None]:
import os
from pathlib import Path
from PIL import Image
import pandas as pd
import matplotlib.pyplot as plt

# Define dataset root and selected folders
DATASET_PATH = Path("/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train")
SELECTED_FOLDERS = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]

data_summary = []

for class_id in SELECTED_FOLDERS:
    class_path = DATASET_PATH / str(class_id)
    if not class_path.exists():
        print(f"‚ö†Ô∏è Folder {class_id} not found.")
        continue

    image_files = list(class_path.glob("*.jpg")) + list(class_path.glob("*.png"))
    image_count = len(image_files)

    # Optional: sample first image to get resolution
    if image_files:
        with Image.open(image_files[0]) as img:
            width, height = img.size
    else:
        width, height = (0, 0)

    data_summary.append({
        "class_id": class_id,
        "num_images": image_count,
        "sample_width": width,
        "sample_height": height
    })

# Save summary
df = pd.DataFrame(data_summary)
df["num_images"].plot(kind="bar", title="Image Count per Class", figsize=(8, 5))
plt.ylabel("Count")
plt.xlabel("Class ID")
plt.tight_layout()
plt.show()

print(df)
df.to_csv("ip102_selected_classes_summary.csv", index=False)

‚ö†Ô∏è Folder 7 not found.
‚ö†Ô∏è Folder 8 not found.
‚ö†Ô∏è Folder 9 not found.
‚ö†Ô∏è Folder 11 not found.
‚ö†Ô∏è Folder 59 not found.
‚ö†Ô∏è Folder 69 not found.
‚ö†Ô∏è Folder 72 not found.
‚ö†Ô∏è Folder 94 not found.
‚ö†Ô∏è Folder 98 not found.
‚ö†Ô∏è Folder 101 not found.


KeyError: 'num_images'

# STEP 1: Classical Augmentation
==========================

Tools: imgaug, torchvision.transforms, Albumentations

Examples: rotations, flips, scaling, brightness, hue, random crops.

Implementation:

In [None]:
# ================================
import os
from pathlib import Path
from PIL import Image
from torchvision import transforms
import matplotlib.pyplot as plt
import random

# Mount Google Drive if dataset is there
# from google.colab import drive
# drive.mount('/content/drive')

DATA_PATH = Path("/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train")  # Example path
SAVE_PATH = Path("/content/IP102_augmented")
SAVE_PATH.mkdir(parents=True, exist_ok=True)

# Selected classes
SELECTED_CLASSES = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]

# Define augmentations
augment = transforms.Compose([
    transforms.RandomRotation(25),
    transforms.RandomHorizontalFlip(p=0.7),
    transforms.RandomVerticalFlip(p=0.3),
    transforms.ColorJitter(brightness=0.3, contrast=0.3, saturation=0.2, hue=0.02),
    transforms.RandomResizedCrop(size=(224, 224), scale=(0.8, 1.0))
])

# How many augmentations per image
AUG_PER_IMAGE = 3

for class_id in SELECTED_CLASSES:
    src_dir = DATA_PATH / str(class_id)
    target_dir = SAVE_PATH / str(class_id)
    target_dir.mkdir(parents=True, exist_ok=True)

    image_files = list(src_dir.glob("*.jpg")) + list(src_dir.glob("*.png"))
    print(f"Processing Class {class_id}: {len(image_files)} images")

    for img_path in image_files:
        try:
            img = Image.open(img_path).convert("RGB")
        except:
            continue

        for i in range(AUG_PER_IMAGE):
            aug_img = augment(img)
            save_name = f"{img_path.stem}_aug{i}.jpg"
            aug_img.save(target_dir / save_name)

print("‚úÖ Classical Augmentation complete!")

In [None]:
# Visualize few augmented examples
sample_class = random.choice(SELECTED_CLASSES)
sample_folder = SAVE_PATH / str(sample_class)
samples = list(sample_folder.glob("*.jpg"))
plt.figure(figsize=(12, 6))
for i, img_path in enumerate(random.sample(samples, 5)):
    img = Image.open(img_path)
    plt.subplot(1, 5, i+1)
    plt.imshow(img)
    plt.axis("off")
plt.suptitle(f"Augmented Samples - Class {sample_class}")
plt.show()

# STEP 2: Random Erasing / Cutout
Improves robustness by obscuring random image regions.

Tools: torchvision.transforms.RandomErasing, or custom Cutout implementation.

In [None]:
# ================================

import torch
from torchvision import transforms
from torchvision.transforms import functional as F
import matplotlib.pyplot as plt
from PIL import Image
import random
from pathlib import Path

DATA_PATH = Path("/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train")
SAVE_PATH = Path("/content/IP102_cutout")
SAVE_PATH.mkdir(parents=True, exist_ok=True)

SELECTED_CLASSES = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]

# Built-in RandomErasing + Custom Cutout Example
cutout_transform = transforms.Compose([
    transforms.RandomResizedCrop(224, scale=(0.8, 1.0)),
    transforms.RandomHorizontalFlip(p=0.5),
    transforms.ToTensor(),
    transforms.RandomErasing(p=0.9, scale=(0.02, 0.2), ratio=(0.3, 3.3), value='random'),
])

def apply_cutout_and_save(img_path, target_dir, n_aug=2):
    image = Image.open(img_path).convert("RGB")
    for i in range(n_aug):
        tensor_img = cutout_transform(image)
        cut_img = F.to_pil_image(tensor_img)
        cut_img.save(target_dir / f"{img_path.stem}_cutout{i}.jpg")

for class_id in SELECTED_CLASSES:
    src_dir = DATA_PATH / str(class_id)
    tgt_dir = SAVE_PATH / str(class_id)
    tgt_dir.mkdir(parents=True, exist_ok=True)

    image_files = list(src_dir.glob("*.jpg")) + list(src_dir.glob("*.png"))
    print(f"Applying Cutout to class {class_id} ({len(image_files)} images)")

    for img_path in image_files:
        try:
            apply_cutout_and_save(img_path, tgt_dir)
        except Exception as e:
            print(f"Error: {img_path} -> {e}")
            continue

print("‚úÖ Random Erasing / Cutout augmentation complete!")

In [None]:
# Preview cutout images
import random
sample_class = random.choice(SELECTED_CLASSES)
sample_folder = SAVE_PATH / str(sample_class)
samples = list(sample_folder.glob("*.jpg"))

plt.figure(figsize=(12, 6))
for i, img_path in enumerate(random.sample(samples, 5)):
    img = Image.open(img_path)
    plt.subplot(1, 5, i + 1)
    plt.imshow(img)
    plt.axis("off")
plt.suptitle(f"Random Erasing / Cutout Samples - Class {sample_class}")
plt.show()

# STEP 3: MixUp and CutMix Augmentation

In [None]:
# =========================================

import torch
import torchvision.transforms as transforms
from torchvision.utils import save_image
from PIL import Image
import random
import os
from pathlib import Path
import numpy as np
import matplotlib.pyplot as plt

# Paths
DATA_PATH = Path("/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train")
SAVE_PATH = Path("/content/IP102_mixup_cutmix")
SAVE_PATH.mkdir(parents=True, exist_ok=True)

SELECTED_CLASSES = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]
IMG_SIZE = 224

# --- Utility to gather filepaths from selected classes ---
def get_class_images(cls):
    cls_path = DATA_PATH / str(cls)
    files = list(cls_path.glob("*.jpg")) + list(cls_path.glob("*.png"))
    return files

all_images = []
for c in SELECTED_CLASSES:
    for f in get_class_images(c):
        all_images.append((f, c))

# --- Base transforms ---
base_tf = transforms.Compose([
    transforms.Resize((IMG_SIZE, IMG_SIZE)),
    transforms.ToTensor()
])

def mixup(img1, img2, alpha=0.3):
    """Combine two images linearly."""
    lam = np.random.beta(alpha, alpha)
    return lam * img1 + (1 - lam) * img2, lam

def cutmix(img1, img2, alpha=1.0):
    """Replace rectangular region of img1 with patch from img2."""
    lam = np.random.beta(alpha, alpha)
    W, H = img1.size()[2], img1.size()[1]
    cut_rat = np.sqrt(1. - lam)
    cut_w, cut_h = int(W * cut_rat), int(H * cut_rat)

    # random center
    cx, cy = np.random.randint(W), np.random.randint(H)
    x1 = np.clip(cx - cut_w // 2, 0, W)
    y1 = np.clip(cy - cut_h // 2, 0, H)
    x2 = np.clip(cx + cut_w // 2, 0, W)
    y2 = np.clip(cy + cut_h // 2, 0, H)

    img1[:, y1:y2, x1:x2] = img2[:, y1:y2, x1:x2]
    return img1, lam

# --- Generate synthetic samples ---
for i in range(80):  # number of synthetic pairs (adjust as needed)
    (f1, c1), (f2, c2) = random.sample(all_images, 2)
    img1, img2 = base_tf(Image.open(f1).convert("RGB")), base_tf(Image.open(f2).convert("RGB"))

    # Randomly choose MixUp or CutMix
    if random.random() < 0.5:
        aug_img, lam = mixup(img1, img2)
        save_dir = SAVE_PATH / "mixup"
        aug_type = "MixUp"
    else:
        aug_img, lam = cutmix(img1.clone(), img2.clone())
        save_dir = SAVE_PATH / "cutmix"
        aug_type = "CutMix"

    save_dir.mkdir(parents=True, exist_ok=True)
    save_name = f"{aug_type}_{c1}_{c2}_{str(i).zfill(3)}.jpg"
    save_image(aug_img, save_dir / save_name)

print("‚úÖ MixUp & CutMix synthetic images generated.")

In [None]:
# Display random samples from MixUp & CutMix
sample_type = random.choice(["mixup", "cutmix"])
sample_folder = SAVE_PATH / sample_type
samples = list(sample_folder.glob("*.jpg"))

plt.figure(figsize=(12,6))
for i, p in enumerate(random.sample(samples, 5)):
    img = Image.open(p)
    plt.subplot(1, 5, i+1)
    plt.imshow(img)
    plt.axis("off")
plt.suptitle(f"{sample_type.upper()} Synthetic Samples")
plt.show()

# STEP 4: DCGAN - Deep Convolutional GAN

In [None]:
# =============================================
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torchvision.utils import save_image, make_grid
from torch.utils.data import DataLoader, Dataset
from PIL import Image
from pathlib import Path
import matplotlib.pyplot as plt
import os
import random

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"Running on: {device}")

# Path setup
DATA_PATH = Path("/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train/7")  # pick one class for initial training
SAVE_PATH = Path("/content/DCGAN_output")
SAVE_PATH.mkdir(parents=True, exist_ok=True)

# Hyperparameters
image_size = 64        # reduce memory cost
nc = 3                 # RGB
nz = 100               # noise vector
ngf = 64
ndf = 64
epochs = 10            # for demo: increase later
batch_size = 64
lr = 0.0002
beta1 = 0.5

# Dataset + transforms
transform = transforms.Compose([
    transforms.Resize(image_size),
    transforms.CenterCrop(image_size),
    transforms.ToTensor(),
    transforms.Normalize([0.5]*3, [0.5]*3)
])

dataset = torchvision.datasets.ImageFolder(root=DATA_PATH.parent, transform=transform)
dataloader = DataLoader(dataset, batch_size=batch_size, shuffle=True, num_workers=2)

# ---------------------------
# Define DCGAN architecture
# ---------------------------

# Generator
class Generator(nn.Module):
    def __init__(self, nz, ngf, nc):
        super(Generator, self).__init__()
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf * 8, 4, 1, 0, bias=False),
            nn.BatchNorm2d(ngf * 8),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 8, ngf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 4, ngf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf * 2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf * 2, ngf, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ngf),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf, nc, 4, 2, 1, bias=False),
            nn.Tanh()
        )

    def forward(self, input):
        return self.main(input)

# Discriminator
class Discriminator(nn.Module):
    def __init__(self, nc, ndf):
        super(Discriminator, self).__init__()
        self.main = nn.Sequential(
            nn.Conv2d(nc, ndf, 4, 2, 1, bias=False),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf, ndf * 2, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 2),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 2, ndf * 4, 4, 2, 1, bias=False),
            nn.BatchNorm2d(ndf * 4),
            nn.LeakyReLU(0.2, inplace=True),

            nn.Conv2d(ndf * 4, 1, 4, 2, 1, bias=False),  # --> down to (B,1,H',W')
            nn.AdaptiveAvgPool2d(1),                    # --> (B,1,1,1)
            nn.Sigmoid()
        )

    def forward(self, input):
        return self.main(input).view(-1)

# Initialize models
netG = Generator(nz, ngf, nc).to(device)
netD = Discriminator(nc, ndf).to(device)

# Loss and optimizers
criterion = nn.BCELoss()
optimizerD = optim.Adam(netD.parameters(), lr=lr, betas=(beta1, 0.999))
optimizerG = optim.Adam(netG.parameters(), lr=lr, betas=(beta1, 0.999))

fixed_noise = torch.randn(64, nz, 1, 1, device=device)

# ---------------------------
# Training Loop
# ---------------------------
for epoch in range(epochs):
    for i, (real_imgs, _) in enumerate(dataloader):
        b_size = real_imgs.size(0)
        real_imgs = real_imgs.to(device)
        real_label = torch.full((b_size,), 1.0, device=device)
        fake_label = torch.full((b_size,), 0.0, device=device)

        # (1) Train Discriminator
        netD.zero_grad()
        output = netD(real_imgs)
        lossD_real = criterion(output, real_label)

        noise = torch.randn(b_size, nz, 1, 1, device=device)
        fake_imgs = netG(noise)
        output = netD(fake_imgs.detach())
        lossD_fake = criterion(output, fake_label)
        lossD = lossD_real + lossD_fake
        lossD.backward()
        optimizerD.step()

        # (2) Train Generator
        netG.zero_grad()
        output = netD(fake_imgs)
        lossG = criterion(output, real_label)
        lossG.backward()
        optimizerG.step()

    print(f"Epoch [{epoch+1}/{epochs}] | D_loss: {lossD.item():.3f} | G_loss: {lossG.item():.3f}")

    with torch.no_grad():
        fake = netG(fixed_noise).detach().cpu()
        save_image(fake, SAVE_PATH / f"fake_epoch_{epoch+1:03}.png", normalize=True, nrow=8)

In [None]:
# Display the latest epoch synthetic images
latest_img = sorted(SAVE_PATH.glob("fake_epoch_*.png"))[-1]
img = Image.open(latest_img)
plt.figure(figsize=(8,8))
plt.imshow(img)
plt.axis("off")
plt.title("DCGAN Synthetic Samples")
plt.show()

# STEP 5: StyleGAN2‚ÄëADA‚ÄëPyTorch

In [None]:
!git clone https://github.com/NVlabs/stylegan2-ada-pytorch.git
%cd stylegan2-ada-pytorch

# Install dependencies
! pip install ninja opensimplex requests tqdm matplotlib

# Optional: connect to Google Drive for dataset and outputs
# from google.colab import drive
# drive.mount('/content/drive')

In [None]:
# ======================
# Preparing the dataset
# ======================
# /content/IP102_selected/
#    ‚îú‚îÄ‚îÄ class8/
#    ‚îú‚îÄ‚îÄ class9/
#    ‚îî‚îÄ‚îÄ class10/

In [None]:
from google.colab import drive
drive.mount('/content/drive')

# Example paths
# Assuming: /content/drive/MyDrive/IP102/classes/
!mkdir -p /content/IP102_selected/{class8,class9,class10}

# Copy from your Drive folders (adjust source)
!cp "/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train/7"*.jpg /content/IP102_selected/class8/
!cp "/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train/8"*.jpg /content/IP102_selected/class9/
!cp "/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train/9"*.jpg /content/IP102_selected/class10/

In [None]:
!python3 dataset_tool.py --source=/content/IP102_selected \
                       --dest=./datasets/ip102_stylegan \
                       --resolution=256

In [None]:
# Example: initialize from AFHQ-cat (similar texture variety)
!wget https://nvlabs-fi-cdn.nvidia.com/stylegan2-ada-pytorch/pretrained/afhqcat.pkl -O pretrained.pkl

!python train.py \
  --outdir=./training-runs/ip102-stylegan \
  --data=./datasets/ip102_stylegan \
  --gpus=1 \
  --batch=16 \
  --cfg=auto \
  --mirror=1 \
  --resume=pretrained.pkl \
  --gamma=10 \
  --snap=5

In [None]:
# Pick snapshot name
SNAPSHOT="./training-runs/ip102-stylegan/00000*/network-snapshot-00010.pkl"

!python generate.py \
  --outdir=./generated_samples \
  --trunc=0.7 \
  --seeds=0-49 \
  --network=$SNAPSHOT

In [None]:
import matplotlib.pyplot as plt
import glob
from PIL import Image

imgs = sorted(glob.glob('./generated_samples/*.png'))[:8]
plt.figure(figsize=(12,6))
for i, path in enumerate(imgs):
    plt.subplot(2,4,i+1)
    img = Image.open(path)
    plt.imshow(img)
    plt.axis("off")
plt.suptitle("StyleGAN2‚ÄëADA Synthetic Insect Samples", fontsize=14)
plt.show()

# STEP 6: CycleGAN ‚Äî Cross‚ÄëDomain Image Translation

In [None]:
# Clone lightweight CycleGAN repo
!git clone https://github.com/junyanz/pytorch-CycleGAN-and-pix2pix.git
%cd pytorch-CycleGAN-and-pix2pix
!pip install dominate visdom

In [None]:
# datasets/ip102A2B/
#     ‚îú‚îÄ‚îÄ trainA/  --> class 8 images
#     ‚îú‚îÄ‚îÄ trainB/  --> class 9 images
#     ‚îú‚îÄ‚îÄ testA/
#     ‚îî‚îÄ‚îÄ testB/

In [None]:
!python train.py \
  --dataroot ./datasets/ip102A2B \
  --name ip102A2B_cyclegan \
  --model cycle_gan \
  --batch_size 2 \
  --num_threads 4 \
  --gpu_ids 0

In [None]:
!python test.py \
  --dataroot ./datasets/ip102A2B \
  --name ip102A2B_cyclegan \
  --model test \
  --num_test 20

# STEP 7: Conditional GAN (cGANs)
cGANs are trained with labels so that \(G(z|y)\) generates an image specifically belonging to class \(y\).

Ideal for multiclass datasets (like IP102 7‚Äì101).

In [None]:
import torch, torch.nn as nn
from torchvision.utils import save_image
import os, numpy as np

num_classes = 10
nz, ngf, ndf, nc = 100, 64, 64, 3

class G_cGAN(nn.Module):
    def __init__(self):
        super().__init__()
        self.label_emb = nn.Embedding(num_classes, nz)
        self.main = nn.Sequential(
            nn.ConvTranspose2d(nz, ngf*8, 4, 1, 0),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf*8, ngf*4, 4, 2, 1),
            nn.BatchNorm2d(ngf*4),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf*4, ngf*2, 4, 2, 1),
            nn.BatchNorm2d(ngf*2),
            nn.ReLU(True),
            nn.ConvTranspose2d(ngf*2, nc, 4, 2, 1),
            nn.Tanh()
        )
    def forward(self, noise, labels):
        z = noise + self.label_emb(labels).unsqueeze(2).unsqueeze(3)
        return self.main(z)

In [None]:
z = torch.randn(batch, nz, 1, 1, device=device)
labels = torch.randint(0, num_classes, (batch,), device=device)
fake = G(z, labels)

# STEP 8: Variational Autoencoder (VAE)


VAEs learn an encoding ‚Üí latent distribution ‚Üí decoding cycle.

You can sample new latent vectors for smooth synthetic variants.

In [None]:
import torch, torch.nn as nn, torch.nn.functional as F
from torchvision import datasets, transforms
from torchvision.utils import save_image

class VAE(nn.Module):
    def __init__(self, latent_dim=64):
        super().__init__()
        self.enc = nn.Sequential(
            nn.Conv2d(3, 64, 4, 2, 1), nn.ReLU(),
            nn.Conv2d(64, 128, 4, 2, 1), nn.ReLU(),
            nn.Flatten()
        )
        self.fc_mu = nn.Linear(128*56*56, latent_dim)
        self.fc_logvar = nn.Linear(128*56*56, latent_dim)
        self.fc_dec = nn.Linear(latent_dim, 128*56*56)
        self.dec = nn.Sequential(
            nn.ConvTranspose2d(128, 64, 4, 2, 1), nn.ReLU(),
            nn.ConvTranspose2d(64, 3, 4, 2, 1), nn.Sigmoid()
        )
    def encode(self, x):
        h = self.enc(x)
        return self.fc_mu(h), self.fc_logvar(h)
    def reparam(self, mu, logvar):
        std, eps = torch.exp(0.5*logvar), torch.randn_like(logvar)
        return mu + eps*std
    def decode(self, z):
        h = F.relu(self.fc_dec(z)).view(-1,128,56,56)
        return self.dec(h)
    def forward(self, x):
        mu, logvar = self.encode(x)
        z = self.reparam(mu, logvar)
        return self.decode(z), mu, logvar

# STEP 9: Diffusion Models (Denoising Diffusion Probabilistic Models)


Diffusion models (e.g., DDPM / Stable Diffusion) gradually denoise random noise ‚Üí image.

They yield state‚Äëof‚Äëthe‚Äëart fidelity.

In [None]:
!pip install diffusers transformers accelerate safetensors

In [None]:
from diffusers import DDPMPipeline

model_id = "google/ddpm-cifar10-32"  # small pretrained diffusion
pipe = DDPMPipeline.from_pretrained(model_id)

images = pipe(batch_size=8, num_inference_steps=50, output_type="pil").images
for i,img in enumerate(images):
    img.save(f"diffusion_synth_{i}.png")

# STEP 10: SMOTE for Images (DeepSMOTE)



In [None]:
from imblearn.over_sampling import SMOTE
import torch, os
from torchvision import models, transforms
from PIL import Image
import numpy as np

# Feature extractor
resnet = models.resnet18(pretrained=True)
resnet.fc = torch.nn.Identity()
resnet.eval()

tfm = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

SELECTED_CLASSES = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]
DATA_ROOT = "/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train/"

all_embeddings, all_labels = [], []

for cls in SELECTED_CLASSES:
    cls_path = os.path.join(DATA_ROOT, str(cls))
    if not os.path.exists(cls_path):
        continue

    for fname in os.listdir(cls_path)[:50]:  # limit for speed
        try:
            img_path = os.path.join(cls_path, fname)
            img = tfm(Image.open(img_path).convert("RGB")).unsqueeze(0)
            with torch.no_grad():
                emb = resnet(img).squeeze().numpy()
            all_embeddings.append(emb)
            all_labels.append(cls)
        except:
            continue

X = np.vstack(all_embeddings)
y = np.array(all_labels)

# Apply SMOTE
smote = SMOTE(sampling_strategy='auto', k_neighbors=3, random_state=42)
X_res, y_res = smote.fit_resample(X, y)

print(f"Original: {X.shape}, Resampled: {X_res.shape}")
print("Class distribution after SMOTE:")
from collections import Counter
print(Counter(y_res))

# Part A: Balancing Dataset to 500‚Äì1000 Images per Class
Strategy:

- Downsample Class 101 from 3444 ‚Üí 1000

- Upsample minority classes using available synthetic techniques

- Create a balanced dataset ready for CNN training

In [None]:
import os
import shutil
from pathlib import Path
import random
from collections import Counter

# === Paths ===
DATA_ROOT = Path("/root/.cache/kagglehub/datasets/rtlmhjbn/ip02-dataset/versions/1/classification/train/")
SYNTHETIC_ROOT = Path("/content/drive/MyDrive/IP102_Synthetic")
BALANCED_ROOT = Path("/content/drive/MyDrive/IP102_Balanced_Final")
BALANCED_ROOT.mkdir(parents=True, exist_ok=True)

SELECTED_CLASSES = [7, 8, 9, 11, 59, 69, 72, 94, 98, 101]
TARGET_MIN = 500
TARGET_MAX = 1000

# === Current counts ===
print("Current class distribution:")
class_counts = {}
for cls in SELECTED_CLASSES:
    cls_path = DATA_ROOT / str(cls)
    if cls_path.exists():
        imgs = list(cls_path.glob("*.jpg")) + list(cls_path.glob("*.png"))
        class_counts[cls] = len(imgs)
        print(f"  Class {cls}: {class_counts[cls]} images")
    else:
        class_counts[cls] = 0
        print(f"  Class {cls}: NOT FOUND")

print("\n" + "="*60)
print("BALANCING PROCESS")
print("="*60)

# === Balance each class ===
for cls in SELECTED_CLASSES:
    current_count = class_counts[cls]
    balanced_dir = BALANCED_ROOT / str(cls)
    balanced_dir.mkdir(parents=True, exist_ok=True)

    original_imgs = list((DATA_ROOT / str(cls)).glob("*.*"))

    # Case 1: Class has more than TARGET_MAX ‚Üí Downsample
    if current_count > TARGET_MAX:
        print(f"\nüìâ Class {cls}: Downsampling {current_count} ‚Üí {TARGET_MAX}")
        sampled = random.sample(original_imgs, TARGET_MAX)
        for img in sampled:
            shutil.copy(img, balanced_dir / img.name)

    # Case 2: Class is within range ‚Üí Copy as-is
    elif TARGET_MIN <= current_count <= TARGET_MAX:
        print(f"\n‚úÖ Class {cls}: Already balanced ({current_count} images)")
        for img in original_imgs:
            shutil.copy(img, balanced_dir / img.name)

    # Case 3: Class below TARGET_MIN ‚Üí Upsample with synthetics
    else:
        deficit = TARGET_MIN - current_count
        print(f"\nüìà Class {cls}: Need {deficit} synthetic images (current: {current_count})")

        # Copy all originals first
        for img in original_imgs:
            shutil.copy(img, balanced_dir / img.name)

        # Gather synthetic images from all available techniques
        synthetic_sources = [
            SYNTHETIC_ROOT / "IP102_augmented" / str(cls),
            SYNTHETIC_ROOT / "IP102_cutout" / str(cls),
            SYNTHETIC_ROOT / "IP102_mixup_cutmix" / "mixup",
            SYNTHETIC_ROOT / "IP102_mixup_cutmix" / "cutmix",
            SYNTHETIC_ROOT / "DCGAN" / str(cls),
            SYNTHETIC_ROOT / "VAE",
        ]

        available_synthetics = []
        for src in synthetic_sources:
            if src.exists():
                available_synthetics.extend(list(src.glob("*.jpg")) + list(src.glob("*.png")))

        if len(available_synthetics) == 0:
            print(f"  ‚ö†Ô∏è Warning: No synthetic images found for class {cls}")
            print(f"  ‚Üí You need to generate synthetics first using Steps 1-10")
            continue

        # Sample synthetics to fill the gap
        needed = min(deficit, len(available_synthetics))
        sampled_synthetics = random.sample(available_synthetics, needed)

        for i, syn_img in enumerate(sampled_synthetics):
            new_name = f"syn_{cls}_{i:05d}.jpg"
            shutil.copy(syn_img, balanced_dir / new_name)

        final_count = len(list(balanced_dir.glob("*.*")))
        print(f"  ‚úÖ Final count for class {cls}: {final_count}")

print("\n" + "="*60)
print("‚úÖ BALANCING COMPLETE")
print("="*60)
print(f"Balanced dataset saved at: {BALANCED_ROOT}")

### Verify Balance

In [None]:
import matplotlib.pyplot as plt

balanced_counts = {}
for cls in SELECTED_CLASSES:
    balanced_dir = BALANCED_ROOT / str(cls)
    if balanced_dir.exists():
        balanced_counts[cls] = len(list(balanced_dir.glob("*.*")))
    else:
        balanced_counts[cls] = 0

plt.figure(figsize=(10,5))
plt.bar([str(c) for c in balanced_counts.keys()], balanced_counts.values(), color='teal')
plt.axhline(500, color='red', linestyle='--', label='Min Target (500)', linewidth=2)
plt.axhline(1000, color='orange', linestyle='--', label='Max Target (1000)', linewidth=2)
plt.xlabel("Class ID")
plt.ylabel("Image Count")
plt.title("Balanced IP102 Dataset (Final)")
plt.legend()
plt.grid(axis='y', alpha=0.3)
plt.tight_layout()
plt.savefig(f"{BALANCED_ROOT.parent}/balanced_distribution.png", dpi=150)
plt.show()

print("\nBalanced class distribution:")
for cls, cnt in balanced_counts.items():
    print(f"  Class {cls}: {cnt} images")

# Part B: Testing Each Technique with CNN

Training one CNN per technique (10 models total), then compare metrics.


In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms, models
from sklearn.metrics import accuracy_score, classification_report, f1_score
import pandas as pd
import numpy as np

device = "cuda" if torch.cuda.is_available() else "cpu"
print(f"üöÄ Training on: {device}")

# === Hyperparameters ===
BATCH_SIZE = 32
EPOCHS = 15
LR = 0.001
NUM_CLASSES = len(SELECTED_CLASSES)

# === Transforms ===
train_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.RandomHorizontalFlip(),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

test_transform = transforms.Compose([
    transforms.Resize((224, 224)),
    transforms.ToTensor(),
    transforms.Normalize([0.485, 0.456, 0.406], [0.229, 0.224, 0.225])
])

# === Define technique-specific datasets ===
TECHNIQUE_DATASETS = {
    "Baseline (Original Only)": DATA_ROOT,
    "Classical Augmentation": SYNTHETIC_ROOT / "IP102_augmented",
    "Random Erasing/Cutout": SYNTHETIC_ROOT / "IP102_cutout",
    "MixUp/CutMix": SYNTHETIC_ROOT / "IP102_mixup_cutmix",
    "DCGAN": SYNTHETIC_ROOT / "DCGAN",
    "Balanced (All Techniques)": BALANCED_ROOT,
}

results = []

# === Training loop for each technique ===
for technique_name, data_path in TECHNIQUE_DATASETS.items():
    if not data_path.exists():
        print(f"‚ö†Ô∏è Skipping {technique_name} ‚Äî path not found: {data_path}")
        continue

    print(f"\n{'='*70}")
    print(f"üî¨ Training: {technique_name}")
    print(f"üìÇ Data path: {data_path}")
    print('='*70)

    try:
        # Load dataset
        dataset = datasets.ImageFolder(root=str(data_path), transform=train_transform)

        if len(dataset) == 0:
            print(f"‚ö†Ô∏è No images found")
            continue

        # Train/test split
        train_size = int(0.8 * len(dataset))
        test_size = len(dataset) - train_size
        train_set, test_set = torch.utils.data.random_split(
            dataset, [train_size, test_size],
            generator=torch.Generator().manual_seed(42)  # reproducibility
        )

        train_loader = DataLoader(train_set, batch_size=BATCH_SIZE, shuffle=True, num_workers=2)
        test_loader = DataLoader(test_set, batch_size=BATCH_SIZE, shuffle=False, num_workers=2)

        # Model (ResNet18 pretrained)
        model = models.resnet18(pretrained=True)
        model.fc = nn.Linear(model.fc.in_features, NUM_CLASSES)
        model = model.to(device)

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.Adam(model.parameters(), lr=LR)

        # Training
        for epoch in range(EPOCHS):
            model.train()
            running_loss = 0.0
            for imgs, labels in train_loader:
                imgs, labels = imgs.to(device), labels.to(device)
                optimizer.zero_grad()
                outputs = model(imgs)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            avg_loss = running_loss / len(train_loader)
            if (epoch + 1) % 5 == 0:
                print(f"  Epoch {epoch+1}/{EPOCHS} | Loss: {avg_loss:.4f}")

        # Evaluation
        model.eval()
        all_preds, all_labels = [], []
        with torch.no_grad():
            for imgs, labels in test_loader:
                imgs = imgs.to(device)
                preds = model(imgs).argmax(dim=1).cpu().numpy()
                all_preds.extend(preds)
                all_labels.extend(labels.numpy())

        # Metrics
        acc = accuracy_score(all_labels, all_preds)
        f1 = f1_score(all_labels, all_preds, average='weighted')

        print(f"\n‚úÖ {technique_name}")
        print(f"   Accuracy: {acc:.4f}")
        print(f"   F1 Score: {f1:.4f}")

        results.append({
            "Technique": technique_name,
            "Accuracy": acc,
            "F1_Score": f1,
            "Train_Size": train_size,
            "Test_Size": test_size
        })

    except Exception as e:
        print(f"‚ùå Error: {e}")
        continue

# === Summary ===
if len(results) > 0:
    df_results = pd.DataFrame(results)
    df_results = df_results.sort_values("Accuracy", ascending=False)

    print("\n" + "="*70)
    print("üìä FINAL RESULTS: Technique Performance Comparison")
    print("="*70)
    print(df_results.to_string(index=False))

    # Save results
    output_path = BALANCED_ROOT.parent / "technique_comparison_results.csv"
    df_results.to_csv(output_path, index=False)
    print(f"\nüíæ Results saved: {output_path}")
else:
    print("\n‚ö†Ô∏è No techniques were successfully trained.")

In [None]:
import matplotlib.pyplot as plt

if len(results) > 0:
    df_plot = df_results.sort_values("Accuracy", ascending=True)

    fig, ax = plt.subplots(1, 2, figsize=(14, 6))

    # Accuracy comparison
    ax[0].barh(df_plot["Technique"], df_plot["Accuracy"], color='steelblue')
    ax[0].set_xlabel("Test Accuracy")
    ax[0].set_title("CNN Performance by Augmentation Technique")
    ax[0].set_xlim(0, 1)
    ax[0].grid(axis='x', alpha=0.3)

    # F1 Score comparison
    ax[1].barh(df_plot["Technique"], df_plot["F1_Score"], color='coral')
    ax[1].set_xlabel("F1 Score (Weighted)")
    ax[1].set_title("F1 Score by Augmentation Technique")
    ax[1].set_xlim(0, 1)
    ax[1].grid(axis='x', alpha=0.3)

    plt.tight_layout()
    plt.savefig(BALANCED_ROOT.parent / "technique_comparison_chart.png", dpi=150)
    plt.show()

# End of Notebook