# AgenticGAN Tester: GTSRB (German Traffic Sign Recognition)

##  Project Overview
AgenticGAN-Tester is a novel AI framework that autonomously discovers and generates realistic failure cases for deep learning models using an agent-driven GAN approach.

**Dataset:** GTSRB - German Traffic Sign Recognition Benchmark
- **43 Classes** of traffic signs
- **RGB Images** (variable sizes, resized to 32x32)
- **~39,000 training images**, **~12,600 test images**

##  Pipeline Phases:
1. **Phase I:** Train Base Classifier (CNN) on GTSRB
2. **Phase II:** Train Conditional GAN to generate traffic signs
3. **Phase III:** Agent mines failure cases (low-confidence GAN outputs)
4. **Phase IV:** Retrain classifier with real + failure images (Hardening)
5. **Phase V:** Evaluate accuracy improvement + Grad-CAM visualization

##  Configuration & Setup

In [None]:

#  CONFIGURATION & IMPORTS

import os
import random
import shutil
import numpy as np
import pandas as pd
from PIL import Image
from tqdm import tqdm

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import torch.nn.utils.spectral_norm as spectral_norm
from torch.utils.data import DataLoader, Dataset
from torchvision import datasets, transforms
from torchvision.utils import save_image, make_grid


#  GLOBAL CONFIG (Change these as needed)

CONFIG = {
    # Dataset
    "num_classes": 43,          # GTSRB has 43 traffic sign classes
    "img_size": 32,
    "channels": 3,
    
    # Paths (relative to this notebook)
    "data_root": "../DATA/archive",
    "train_csv": "../DATA/archive/Train.csv",
    "test_csv": "../DATA/archive/Test.csv",
    "train_images": "../DATA/archive/Train",
    "test_images": "../DATA/archive/Test",

    # Model paths
    "model_dir": "./models",
    "classifier_path": "./models/classifier.pth",
    "generator_path": "./models/gan_generator.pth",
    "discriminator_path": "./models/gan_discriminator.pth",
    "hardened_path": "./models/classifier_hardened.pth",
    
    # Output paths
    "gen_images_dir": "./generated_images",
    "failure_dir": "./failure_cases",
    "retrain_data_dir": "./retrain_data",
    "results_dir": "./results",
    
    # Training Hyperparameters
    "classifier_epochs": 10,
    "classifier_lr": 1e-3,
    "classifier_batch_size": 64,
    
    "gan_epochs": 100,
    "gan_lr": 2e-4,
    "gan_batch_size": 128,
    "latent_dim": 128,
    
    "retrain_epochs": 5,
    "retrain_lr": 1e-5,          # Very small to prevent forgetting
    "num_real_samples": 10000,   # Real images for retraining
    
    # Failure Mining
    "num_gan_samples": 2000,
    "confidence_threshold": 0.90,
    
    
    "num_workers": 0,           
}




SEED = 42
torch.manual_seed(SEED)
np.random.seed(SEED)
random.seed(SEED)


# DEVICE SETUP

if torch.cuda.is_available():
    device = torch.device("cuda")
elif torch.backends.mps.is_available():
    device = torch.device("mps")
else:
    device = torch.device("cpu")

print(f" Using device: {device}")


#  CREATE DIRECTORIES

for key in ["model_dir", "gen_images_dir", "failure_dir", "retrain_data_dir", "results_dir"]:
    os.makedirs(CONFIG[key], exist_ok=True)

print(" Configuration loaded successfully!")
print(f" Dataset: GTSRB ({CONFIG['num_classes']} classes)")
print(f" Models will be saved to: {os.path.abspath(CONFIG['model_dir'])}")

üñ•Ô∏è  Using device: mps
‚úÖ Configuration loaded successfully!
üìä Dataset: GTSRB (43 classes)
üìÅ Models will be saved to: /Users/ayushgourav/projectss/AgenticGAN---Tester-/GTSRBT/AGENT/models


## Model Architectures (Classifier + GAN)

In [None]:

#  MODEL ARCHITECTURES



#  Robust CNN Classifier

class RobustCNN(nn.Module):
    def __init__(self, num_classes=43):
        super().__init__()
        self.features = nn.Sequential(
            # Block 1
            nn.Conv2d(3, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
            
            # Block 2
            nn.Conv2d(64, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, 3, padding=1),  
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(2),
        )
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(8 * 8 * 128, 256),
            nn.ReLU(inplace=True),
            nn.Linear(256, num_classes)
        )

    def forward(self, x):
        x = self.features(x)
        x = x.view(x.size(0), -1)
        return self.classifier(x)


#  Conditional Generator (Class-Aware)

class Generator(nn.Module):
    def __init__(self, latent_dim=128, num_classes=43, img_size=32):
        super().__init__()
        self.label_emb = nn.Embedding(num_classes, num_classes)
        self.init_size = img_size // 4

        self.fc = nn.Linear(latent_dim + num_classes, 
                            256 * self.init_size * self.init_size)

        self.net = nn.Sequential(
            nn.BatchNorm2d(256),
            
            nn.Upsample(scale_factor=2),
            nn.Conv2d(256, 128, 3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            
            nn.Upsample(scale_factor=2),
            nn.Conv2d(128, 64, 3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            
            nn.Conv2d(64, 3, 3, padding=1),
            nn.Tanh()
        )

    def forward(self, z, labels):
        label_embed = self.label_emb(labels)
        x = torch.cat((z, label_embed), dim=1)
        x = self.fc(x)
        x = x.view(x.size(0), 256, self.init_size, self.init_size)
        return self.net(x)

#  PatchGAN Discriminator (Spectral Norm)

class Discriminator(nn.Module):
    def __init__(self):
        super().__init__()

        def block(in_c, out_c, normalize=True):
            layers = [spectral_norm(nn.Conv2d(in_c, out_c, 4, 2, 1))]
            if normalize:
                layers.append(nn.BatchNorm2d(out_c))
            layers.append(nn.LeakyReLU(0.2, inplace=True))
            return layers

        self.model = nn.Sequential(
            *block(3, 64, normalize=False),
            *block(64, 128),
            *block(128, 256),
            spectral_norm(nn.Conv2d(256, 1, 4, 1, 0))
        )

    def forward(self, img):
        return self.model(img)

print(" Model architectures defined:")
print("   - RobustCNN (Classifier)")
print("   - Generator (Conditional GAN)")
print("   - Discriminator (PatchGAN)")

‚úÖ Model architectures defined:
   - RobustCNN (Classifier)
   - Generator (Conditional GAN)
   - Discriminator (PatchGAN)


##  Custom Dataset Classes (Kaggle GTSRB Format)

In [None]:

#  CUSTOM DATASET CLASSES


# Standard transform for all phases
transform = transforms.Compose([
    transforms.Resize((CONFIG["img_size"], CONFIG["img_size"])),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])


#  Unified GTSRB Dataset (CSV-based for CORRECT labels)

class GTSRBDataset(Dataset):
    """
    Works for BOTH Train.csv and Test.csv
    Ensures ClassId in CSV matches the label used by model
    """
    def __init__(self, csv_file, root_dir, transform=None):
        self.df = pd.read_csv(csv_file)
        self.root_dir = root_dir
        self.transform = transform
        self.csv_file = csv_file
        
        # Detect column names
        self.path_col = 'Path' if 'Path' in self.df.columns else 'image_path'
        self.label_col = 'ClassId' if 'ClassId' in self.df.columns else 'label'
        
        print(f"   Loaded {len(self.df)} samples from {os.path.basename(csv_file)}")
        print(f"   Label range: {self.df[self.label_col].min()} to {self.df[self.label_col].max()}")

    def __len__(self):
        return len(self.df)

    def __getitem__(self, idx):
        rel_path = self.df.iloc[idx][self.path_col]
        full_img_path = os.path.join(self.root_dir, rel_path)
        
        if not os.path.exists(full_img_path):
            basename = os.path.basename(rel_path)
            parent = os.path.dirname(rel_path)
            if "Train" in self.csv_file:
                full_img_path = os.path.join(self.root_dir, "Train", parent, basename)
            else:
                full_img_path = os.path.join(self.root_dir, "Test", basename)

        try:
            image = Image.open(full_img_path).convert('RGB')
        except (FileNotFoundError, OSError):
            image = Image.new('RGB', (32, 32), color='black')

        label = int(self.df.iloc[idx][self.label_col])

        if self.transform:
            image = self.transform(image)

        return image, label

# Alias for backward compatibility
GTSRBTestDataset = GTSRBDataset


#  Mixed Dataset (Real + Failure Images)

class MixedDataset(Dataset):
    def __init__(self, real_dir, fail_dir, transform=None):
        self.samples = []
        self.transform = transform

        if os.path.exists(real_dir):
            for fname in os.listdir(real_dir):
                if fname.endswith(".png") and "label_" in fname:
                    try:
                        label = int(fname.split("label_")[1].split(".")[0])
                        self.samples.append((os.path.join(real_dir, fname), label))
                    except: 
                        pass

        if os.path.exists(fail_dir):
            for fname in os.listdir(fail_dir):
                if fname.endswith(".png") and "label_" in fname:
                    try:
                        label = int(fname.split("label_")[1].split(".")[0])
                        self.samples.append((os.path.join(fail_dir, fname), label))
                    except: 
                        pass

    def __len__(self):
        return len(self.samples)

    def __getitem__(self, idx):
        img_path, label = self.samples[idx]
        img = Image.open(img_path).convert("RGB")

        if self.transform:
            img = self.transform(img)

        return img, label

print(" Custom dataset classes defined")
print("   - GTSRBDataset (CSV-based)")
print("   - GTSRBTestDataset (alias)")
print("   - MixedDataset (for retraining)")

‚úÖ Custom dataset classes defined
   - GTSRBDataset (CSV-based)
   - GTSRBTestDataset (alias)
   - MixedDataset (for retraining)



#  Phase I: Train Base Classifier

Train a CNN classifier on the GTSRB training set to recognize 43 traffic sign classes.

In [None]:

#  PHASE I: TRAIN BASE CLASSIFIER



#  Load Training Data (CSV-based for CORRECT labels)

print("üìä Loading training data from CSV...")
train_data = GTSRBDataset(
    csv_file=CONFIG["train_csv"],
    root_dir=CONFIG["data_root"],
    transform=transform
)

train_loader = DataLoader(
    train_data,
    batch_size=CONFIG["classifier_batch_size"],
    shuffle=True,
    num_workers=CONFIG["num_workers"],
    pin_memory=True
)


#  Load Test Data (CSV-based)

print("üìä Loading test data from CSV...")
test_data = GTSRBDataset(
    csv_file=CONFIG["test_csv"],
    root_dir=CONFIG["data_root"],
    transform=transform
)

test_loader = DataLoader(
    test_data,
    batch_size=CONFIG["classifier_batch_size"],
    shuffle=False,
    num_workers=CONFIG["num_workers"],
    pin_memory=True
)


#  Initialize Model

classifier = RobustCNN(num_classes=CONFIG["num_classes"]).to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=CONFIG["classifier_lr"])


#  Training Loop

def train_classifier(model, loader, epochs):
    model.train()
    
    for epoch in range(epochs):
        running_loss = 0.0
        correct = 0
        total = 0
        
        loop = tqdm(loader, desc=f"Epoch {epoch+1}/{epochs}")
        for images, labels in loop:
            images = images.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()

            running_loss += loss.item()
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
            
            loop.set_postfix(loss=f"{loss.item():.4f}")

        acc = 100.0 * correct / total
        print(f"Epoch [{epoch+1}/{epochs}] | Loss: {running_loss:.4f} | Train Acc: {acc:.2f}%")


# üîπ Train & Save

print("\n Starting classifier training...")
train_classifier(classifier, train_loader, CONFIG["classifier_epochs"])

torch.save(classifier.state_dict(), CONFIG["classifier_path"])
print(f"\n Base Classifier saved to: {os.path.abspath(CONFIG['classifier_path'])}")


#  Quick Evaluation

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Evaluating"):
            images = images.to(device)
            labels = labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    return 100.0 * correct / total

classifier.eval()
base_accuracy = evaluate_model(classifier, test_loader)
print(f"\n Base Classifier Test Accuracy: {base_accuracy:.2f}%")

üìä Loading training data from CSV...
   Loaded 39209 samples from Train.csv
   Label range: 0 to 42
üìä Loading test data from CSV...
   Loaded 12630 samples from Test.csv
   Label range: 0 to 42

üöÄ Starting classifier training...


Epoch 1/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:36<00:00, 16.90it/s, loss=0.0538]
Epoch 1/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:36<00:00, 16.90it/s, loss=0.0538]


Epoch [1/10] | Loss: 434.7373 | Train Acc: 79.72%


Epoch 2/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:56<00:00, 10.82it/s, loss=0.2035]
Epoch 2/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:56<00:00, 10.82it/s, loss=0.2035]


Epoch [2/10] | Loss: 30.5098 | Train Acc: 98.60%


Epoch 3/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:59<00:00, 10.26it/s, loss=0.0267]
Epoch 3/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:59<00:00, 10.26it/s, loss=0.0267]


Epoch [3/10] | Loss: 23.1607 | Train Acc: 98.85%


Epoch 4/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:59<00:00, 10.32it/s, loss=0.0681]
Epoch 4/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:59<00:00, 10.32it/s, loss=0.0681]


Epoch [4/10] | Loss: 15.8329 | Train Acc: 99.21%


Epoch 5/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:57<00:00, 10.63it/s, loss=0.0168]
Epoch 5/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:57<00:00, 10.63it/s, loss=0.0168]


Epoch [5/10] | Loss: 17.3650 | Train Acc: 99.14%


Epoch 6/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:58<00:00, 10.52it/s, loss=0.0003]
Epoch 6/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:58<00:00, 10.52it/s, loss=0.0003]


Epoch [6/10] | Loss: 14.2450 | Train Acc: 99.30%


Epoch 7/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:29<00:00, 20.84it/s, loss=0.0079]
Epoch 7/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:29<00:00, 20.84it/s, loss=0.0079]


Epoch [7/10] | Loss: 11.6080 | Train Acc: 99.44%


Epoch 8/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:28<00:00, 21.43it/s, loss=0.0055]
Epoch 8/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:28<00:00, 21.43it/s, loss=0.0055]


Epoch [8/10] | Loss: 10.0064 | Train Acc: 99.46%


Epoch 9/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:28<00:00, 21.75it/s, loss=0.0081]
Epoch 9/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:28<00:00, 21.75it/s, loss=0.0081]


Epoch [9/10] | Loss: 10.5830 | Train Acc: 99.47%


Epoch 10/10: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 613/613 [00:28<00:00, 21.24it/s, loss=0.0901]



Epoch [10/10] | Loss: 10.3530 | Train Acc: 99.48%

‚úÖ Base Classifier saved to: /Users/ayushgourav/projectss/AgenticGAN---Tester-/GTSRBT/AGENT/models/classifier.pth


Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 198/198 [00:04<00:00, 41.60it/s]


‚úÖ Base Classifier Test Accuracy: 96.17%





In [None]:

# Evaluate Base Classifier on Test Set

def evaluate_model(model, loader):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(loader, desc="Evaluating"):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = outputs.argmax(dim=1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return 100.0 * correct / total

# Evaluate
classifier.eval()
base_accuracy = evaluate_model(classifier, test_loader)
print(f"\n Base Classifier Test Accuracy: {base_accuracy:.2f}%")

Evaluating: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 198/198 [00:04<00:00, 47.47it/s]


‚úÖ Base Classifier Test Accuracy: 96.17%






#  Phase II: Train Conditional GAN

Train a class-conditional GAN to generate realistic traffic sign images.

In [None]:

#  PHASE II: TRAIN CONDITIONAL GAN


#  GAN Data Loader (CSV-based for correct labels)

print("Loading GAN training data from CSV...")
gan_dataset = GTSRBDataset(
    csv_file=CONFIG["train_csv"],
    root_dir=CONFIG["data_root"],
    transform=transform
)

gan_loader = DataLoader(
    gan_dataset,
    batch_size=CONFIG["gan_batch_size"],
    shuffle=True,
    num_workers=CONFIG["num_workers"],
    pin_memory=True
)


# Initialize GAN Models

generator = Generator(
    latent_dim=CONFIG["latent_dim"],
    num_classes=CONFIG["num_classes"]
).to(device)

discriminator = Discriminator().to(device)


#  Hinge Loss Functions

def d_hinge_loss(real_logits, fake_logits):
    return torch.mean(F.relu(1.0 - real_logits)) + torch.mean(F.relu(1.0 + fake_logits))

def g_hinge_loss(fake_logits):
    return -torch.mean(fake_logits)


#  Optimizers (TTUR: different learning rates)

optimizer_G = optim.Adam(generator.parameters(), lr=CONFIG["gan_lr"], betas=(0.0, 0.9))
optimizer_D = optim.Adam(discriminator.parameters(), lr=CONFIG["gan_lr"], betas=(0.0, 0.9))


# Training Loop

print("\nStarting GAN training...")
generator.train()
discriminator.train()

for epoch in range(CONFIG["gan_epochs"]):
    loop = tqdm(gan_loader, desc=f"GAN Epoch {epoch+1}/{CONFIG['gan_epochs']}")
    
    for real_imgs, labels in loop:
        real_imgs = real_imgs.to(device)
        labels = labels.to(device)
        batch_size = real_imgs.size(0)

        # Train Discriminator
        optimizer_D.zero_grad()
        
        z = torch.randn(batch_size, CONFIG["latent_dim"], device=device)
        fake_imgs = generator(z, labels).detach()
        
        real_logits = discriminator(real_imgs)
        fake_logits = discriminator(fake_imgs)
        
        loss_D = d_hinge_loss(real_logits, fake_logits)
        loss_D.backward()
        optimizer_D.step()
        #  Train Generator 
        optimizer_G.zero_grad()
        
        z = torch.randn(batch_size, CONFIG["latent_dim"], device=device)
        fake_imgs = generator(z, labels)
        fake_logits = discriminator(fake_imgs)
        
        loss_G = g_hinge_loss(fake_logits)
        loss_G.backward()
        optimizer_G.step()

        loop.set_postfix(D_loss=f"{loss_D.item():.4f}", G_loss=f"{loss_G.item():.4f}")

    # Save sample images every 10 epochs
    if (epoch + 1) % 10 == 0:
        z = torch.randn(25, CONFIG["latent_dim"], device=device)
        sample_labels = torch.randint(0, CONFIG["num_classes"], (25,), device=device)
        samples = generator(z, sample_labels)
        
        save_path = os.path.join(CONFIG["gen_images_dir"], f"epoch_{epoch+1}.png")
        save_image(samples, save_path, nrow=5, normalize=True)
        print(f"üñºÔ∏è Saved samples: {save_path}")


# üîπ Save GAN Models

torch.save(generator.state_dict(), CONFIG["generator_path"])
torch.save(discriminator.state_dict(), CONFIG["discriminator_path"])
print(f"\nGAN models saved!")

üìä Loading GAN training data from CSV...
   Loaded 39209 samples from Train.csv
   Label range: 0 to 42

üé® Starting GAN training...


GAN Epoch 1/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:47<00:00,  6.46it/s, D_loss=1.4899, G_loss=0.8354] 
GAN Epoch 1/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:47<00:00,  6.46it/s, D_loss=1.4899, G_loss=0.8354]
GAN Epoch 2/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:47<00:00,  6.50it/s, D_loss=1.3026, G_loss=0.3376] 
GAN Epoch 2/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:47<00:00,  6.50it/s, D_loss=1.3026, G_loss=0.3376]
GAN Epoch 3/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:47<00:00,  6.47it/s, D_loss=1.1384, G_loss=0.4345] 
GAN Epoch 3/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:47<00:00,  6.47it/s, D_loss=1.1384, G_loss=0.4345]
GAN Epoch 4/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.3432, G_loss=0.5922] 
GAN Epoch 4/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.3432, G_loss=0.5922]
GAN Epoch 5/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/30

üñºÔ∏è Saved samples: ./generated_images/epoch_10.png


GAN Epoch 11/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.32it/s, D_loss=1.0653, G_loss=1.8353] 
GAN Epoch 11/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.32it/s, D_loss=1.0653, G_loss=1.8353]
GAN Epoch 12/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.19it/s, D_loss=1.1401, G_loss=1.6909] 
GAN Epoch 12/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.19it/s, D_loss=1.1401, G_loss=1.6909] 
GAN Epoch 13/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.11it/s, D_loss=0.7184, G_loss=0.8670] 
GAN Epoch 13/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.11it/s, D_loss=0.7184, G_loss=0.8670]
GAN Epoch 14/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.19it/s, D_loss=1.0814, G_loss=1.1300] 
GAN Epoch 14/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.19it/s, D_loss=1.0814, G_loss=1.1300]
GAN Epoch 15/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

üñºÔ∏è Saved samples: ./generated_images/epoch_20.png


GAN Epoch 21/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.0020, G_loss=1.2750] 
GAN Epoch 21/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.0020, G_loss=1.2750]
GAN Epoch 22/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.2440, G_loss=-0.1912]
GAN Epoch 22/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.2440, G_loss=-0.1912]
GAN Epoch 23/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.17it/s, D_loss=0.7683, G_loss=1.8100] 
GAN Epoch 23/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.17it/s, D_loss=0.7683, G_loss=1.8100]
GAN Epoch 24/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.12it/s, D_loss=0.8859, G_loss=1.3643] 
GAN Epoch 24/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.12it/s, D_loss=0.8859, G_loss=1.3643]
GAN Epoch 25/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚

üñºÔ∏è Saved samples: ./generated_images/epoch_30.png


GAN Epoch 31/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.12it/s, D_loss=0.9168, G_loss=1.6750] 
GAN Epoch 31/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.12it/s, D_loss=0.9168, G_loss=1.6750]
GAN Epoch 32/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=0.7017, G_loss=0.3691] 
GAN Epoch 32/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=0.7017, G_loss=0.3691]
GAN Epoch 33/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.32it/s, D_loss=0.4301, G_loss=1.4245] 
GAN Epoch 33/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.32it/s, D_loss=0.4301, G_loss=1.4245]
GAN Epoch 34/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.36it/s, D_loss=1.0841, G_loss=1.1116] 
GAN Epoch 34/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.36it/s, D_loss=1.0841, G_loss=1.1116]
GAN Epoch 35/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñ

üñºÔ∏è Saved samples: ./generated_images/epoch_40.png


GAN Epoch 41/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.28it/s, D_loss=1.0695, G_loss=0.1637] 
GAN Epoch 41/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.28it/s, D_loss=1.0695, G_loss=0.1637]
GAN Epoch 42/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.2158, G_loss=1.9439] 
GAN Epoch 42/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.20it/s, D_loss=1.2158, G_loss=1.9439]
GAN Epoch 43/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=0.7073, G_loss=1.0268]
GAN Epoch 43/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=0.7073, G_loss=1.0268]
GAN Epoch 44/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.09it/s, D_loss=0.8393, G_loss=0.7561] 
GAN Epoch 44/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.09it/s, D_loss=0.8393, G_loss=0.7561]
GAN Epoch 45/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

üñºÔ∏è Saved samples: ./generated_images/epoch_50.png


GAN Epoch 51/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.32it/s, D_loss=0.5934, G_loss=1.9553] 
GAN Epoch 51/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.32it/s, D_loss=0.5934, G_loss=1.9553]
GAN Epoch 52/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.35it/s, D_loss=0.2921, G_loss=0.9626] 
GAN Epoch 52/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.35it/s, D_loss=0.2921, G_loss=0.9626]
GAN Epoch 53/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.31it/s, D_loss=0.8253, G_loss=0.4595]
GAN Epoch 53/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.31it/s, D_loss=0.8253, G_loss=0.4595]
GAN Epoch 54/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.28it/s, D_loss=0.6981, G_loss=1.7643] 
GAN Epoch 54/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.28it/s, D_loss=0.6981, G_loss=1.7643]
GAN Epoch 55/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

üñºÔ∏è Saved samples: ./generated_images/epoch_60.png


GAN Epoch 61/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.14it/s, D_loss=1.1428, G_loss=1.2018]
GAN Epoch 61/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.14it/s, D_loss=1.1428, G_loss=1.2018]
GAN Epoch 62/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.11it/s, D_loss=0.8451, G_loss=0.3196] 
GAN Epoch 62/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.11it/s, D_loss=0.8451, G_loss=0.3196]
GAN Epoch 63/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.26it/s, D_loss=1.0550, G_loss=0.3333] 
GAN Epoch 63/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.26it/s, D_loss=1.0550, G_loss=0.3333]
GAN Epoch 64/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.34it/s, D_loss=0.8259, G_loss=0.4054] 
GAN Epoch 64/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:48<00:00,  6.34it/s, D_loss=0.8259, G_loss=0.4054]
GAN Epoch 65/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà

üñºÔ∏è Saved samples: ./generated_images/epoch_70.png


GAN Epoch 71/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.25it/s, D_loss=1.0628, G_loss=0.2844]
GAN Epoch 71/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.25it/s, D_loss=1.0628, G_loss=0.2844]
GAN Epoch 72/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.22it/s, D_loss=0.6122, G_loss=0.9627]
GAN Epoch 72/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.22it/s, D_loss=0.6122, G_loss=0.9627]
GAN Epoch 73/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.07it/s, D_loss=0.2631, G_loss=0.8215]
GAN Epoch 73/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.07it/s, D_loss=0.2631, G_loss=0.8215]
GAN Epoch 74/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.08it/s, D_loss=0.7964, G_loss=0.0383]
GAN Epoch 74/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.08it/s, D_loss=0.7964, G_loss=0.0383]
GAN Epoch 75/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 3

üñºÔ∏è Saved samples: ./generated_images/epoch_80.png


GAN Epoch 81/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=1.2064, G_loss=0.2840]
GAN Epoch 81/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=1.2064, G_loss=0.2840]
GAN Epoch 82/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.16it/s, D_loss=0.7403, G_loss=1.7213] 
GAN Epoch 82/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.16it/s, D_loss=0.7403, G_loss=1.7213]
GAN Epoch 83/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=0.5811, G_loss=1.9633]
GAN Epoch 83/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:49<00:00,  6.14it/s, D_loss=0.5811, G_loss=1.9633]
GAN Epoch 84/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.14it/s, D_loss=0.9893, G_loss=0.5978] 
GAN Epoch 84/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:50<00:00,  6.14it/s, D_loss=0.9893, G_loss=0.5978]
GAN Epoch 85/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|

üñºÔ∏è Saved samples: ./generated_images/epoch_90.png


GAN Epoch 91/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:56<00:00,  5.44it/s, D_loss=1.0159, G_loss=1.3691]
GAN Epoch 91/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:56<00:00,  5.44it/s, D_loss=1.0159, G_loss=1.3691]
GAN Epoch 92/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:56<00:00,  5.45it/s, D_loss=1.0595, G_loss=1.5669] 
GAN Epoch 92/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:56<00:00,  5.45it/s, D_loss=1.0595, G_loss=1.5669]
GAN Epoch 93/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:55<00:00,  5.50it/s, D_loss=1.0396, G_loss=1.0848] 
GAN Epoch 93/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:55<00:00,  5.50it/s, D_loss=1.0396, G_loss=1.0848]
GAN Epoch 94/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:55<00:00,  5.52it/s, D_loss=0.8776, G_loss=1.2236]
GAN Epoch 94/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 307/307 [00:55<00:00,  5.52it/s, D_loss=0.8776, G_loss=1.2236]
GAN Epoch 95/100: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà|

üñºÔ∏è Saved samples: ./generated_images/epoch_100.png

‚úÖ GAN models saved!






#  Phase III: Agentic Failure Mining

Generate GAN images and identify cases where the classifier has low confidence (potential failure cases).

In [None]:

# PHASE III: AGENTIC FAILURE MINING


# Clear old failure cases
if os.path.exists(CONFIG["failure_dir"]):
    shutil.rmtree(CONFIG["failure_dir"])
os.makedirs(CONFIG["failure_dir"], exist_ok=True)


# Load Trained Models

classifier = RobustCNN(num_classes=CONFIG["num_classes"]).to(device)
classifier.load_state_dict(torch.load(CONFIG["classifier_path"], map_location=device))
classifier.eval()

generator = Generator(
    latent_dim=CONFIG["latent_dim"],
    num_classes=CONFIG["num_classes"]
).to(device)
generator.load_state_dict(torch.load(CONFIG["generator_path"], map_location=device))
generator.eval()

print("‚úÖ Models loaded for failure mining")


#  Mine Failure Cases

failure_count = 0

print(f"\nüîç Mining {CONFIG['num_gan_samples']} GAN samples for failures...")
print(f"   Confidence threshold: {CONFIG['confidence_threshold']}")

for i in tqdm(range(CONFIG["num_gan_samples"]), desc="Mining"):
    z = torch.randn(1, CONFIG["latent_dim"], device=device)
    label = torch.randint(0, CONFIG["num_classes"], (1,), device=device)

    with torch.no_grad():
        fake_img = generator(z, label)
        logits = classifier(fake_img)
        probs = torch.softmax(logits, dim=1)
        max_confidence, pred_class = probs.max(dim=1)

    
    if max_confidence.item() < CONFIG["confidence_threshold"]:
        failure_count += 1
        save_image(
            fake_img,
            os.path.join(CONFIG["failure_dir"], f"failure_{failure_count}.png"),
            normalize=True
        )

print(f"\nFailure Mining Complete!")
print(f"   Total failures found: {failure_count}")
print(f"   Saved to: {os.path.abspath(CONFIG['failure_dir'])}")

‚úÖ Models loaded for failure mining

üîç Mining 2000 GAN samples for failures...
   Confidence threshold: 0.9


Mining: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 2000/2000 [00:28<00:00, 69.87it/s]


‚úÖ Failure Mining Complete!
   Total failures found: 699
   Saved to: /Users/ayushgourav/projectss/AgenticGAN---Tester-/GTSRBT/AGENT/failure_cases






#  Phase IV: Retrain Classifier (Hardening)

Create a mixed dataset of real images + failure cases, then fine-tune the classifier to become more robust.

In [None]:

#  PHASE IV-A: PREPARE MIXED DATASET

if os.path.exists(CONFIG["retrain_data_dir"]):
    shutil.rmtree(CONFIG["retrain_data_dir"])

real_dir = os.path.join(CONFIG["retrain_data_dir"], "real")
fail_dir = os.path.join(CONFIG["retrain_data_dir"], "failure")
os.makedirs(real_dir, exist_ok=True)
os.makedirs(fail_dir, exist_ok=True)


# üîπ Load GTSRB Training Data (CSV-BASED - CORRECT LABELS!)

print(" Loading training data from CSV (correct labels)...")
gtsrb_train = GTSRBDataset(
    csv_file=CONFIG["train_csv"],
    root_dir=CONFIG["data_root"],
    transform=transform
)

#  Save RANDOM Real Images (prevents class bias)

print(f"\nSelecting {CONFIG['num_real_samples']} random real images...")

indices = torch.randperm(len(gtsrb_train))[:CONFIG["num_real_samples"]]

for i, idx in enumerate(tqdm(indices, desc="Saving real images")):
    
    img, label = gtsrb_train[idx.item()]
    save_image(
        img,
        os.path.join(real_dir, f"real_{i}_label_{label}.png"),
        normalize=True
    )

print(f"   Saved {len(indices)} real images with CORRECT labels")


# Load Classifier for Pseudo-labeling
   
classifier = RobustCNN(num_classes=CONFIG["num_classes"]).to(device)
classifier.load_state_dict(torch.load(CONFIG["classifier_path"], map_location=device))
classifier.eval()
print(f"‚úÖ Loaded classifier for pseudo-labeling")


#  Pseudo-label Failure Images

failure_files = [f for f in os.listdir(CONFIG["failure_dir"]) if f.endswith(".png")]
print(f"\nPseudo-labeling {len(failure_files)} failure cases...")

with torch.no_grad():
    for idx, file in enumerate(tqdm(failure_files, desc="Labeling failures")):
        try:
            img = Image.open(os.path.join(CONFIG["failure_dir"], file)).convert("RGB")
            img_tensor = transform(img).unsqueeze(0).to(device)
            
            logits = classifier(img_tensor)
            pseudo_label = logits.argmax(dim=1).item()
            
            save_image(
                img_tensor.squeeze(0),
                os.path.join(fail_dir, f"failure_{idx}_label_{pseudo_label}.png"),
                normalize=True
            )
        except Exception as e:
            print(f"Skipping {file}: {e}")

print(f"\n‚úÖ Mixed dataset prepared!")
print(f"   Real images: {len(os.listdir(real_dir))}")
print(f"   Failure images: {len(os.listdir(fail_dir))}")


# VERIFY: Check label distribution

print("\n Verifying label distribution in real images...")
label_counts = {}
for fname in os.listdir(real_dir)[:100]:
    if "label_" in fname:
        label = int(fname.split("label_")[1].split(".")[0])
        label_counts[label] = label_counts.get(label, 0) + 1

print(f"   Sample of labels found: {dict(list(sorted(label_counts.items()))[:10])}")
print(f"   Total unique labels: {len(label_counts)}")

if len(label_counts) < 10:
    print(" WARNING: Very few unique labels! Something may be wrong.")
else:
    print("Label distribution looks good!")

üìä Loading training data from CSV (correct labels)...
   Loaded 39209 samples from Train.csv
   Label range: 0 to 42

üìä Selecting 10000 random real images...


Saving real images: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 10000/10000 [00:04<00:00, 2009.48it/s]



   Saved 10000 real images with CORRECT labels
‚úÖ Loaded classifier for pseudo-labeling

üìä Pseudo-labeling 699 failure cases...


Labeling failures: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 699/699 [00:02<00:00, 347.71it/s]


‚úÖ Mixed dataset prepared!
   Real images: 10000
   Failure images: 699

üîç Verifying label distribution in real images...
   Sample of labels found: {0: 1, 1: 3, 2: 6, 3: 4, 4: 4, 5: 8, 6: 1, 7: 2, 8: 4, 9: 6}
   Total unique labels: 34
‚úÖ Label distribution looks good!





In [None]:

#  PHASE IV-B: LOAD MIXED DATASET & RETRAIN
# 


#  Create Mixed DataLoader

real_dir = os.path.join(CONFIG["retrain_data_dir"], "real")
fail_dir = os.path.join(CONFIG["retrain_data_dir"], "failure")

mixed_dataset = MixedDataset(real_dir, fail_dir, transform=transform)
mixed_loader = DataLoader(
    mixed_dataset,
    batch_size=CONFIG["classifier_batch_size"],
    shuffle=True,
    num_workers=CONFIG["num_workers"],  
    pin_memory=True
)

print(f"‚úÖMixed dataset loaded: {len(mixed_dataset)} samples")


#  Load BASE Classifier (not random!)

classifier = RobustCNN(num_classes=CONFIG["num_classes"]).to(device)
classifier.load_state_dict(torch.load(CONFIG["classifier_path"], map_location=device))
print(f"Loaded base classifier from {CONFIG['classifier_path']}")


#  Fine-tuning Setup (VERY LOW learning rate)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(classifier.parameters(), lr=CONFIG["retrain_lr"])


# Fine-tuning Loop

print(f"\nStarting hardening (fine-tuning) for {CONFIG['retrain_epochs']} epochs...")
print(f"   Learning rate: {CONFIG['retrain_lr']} (very low to preserve knowledge)")

classifier.train()

for epoch in range(CONFIG["retrain_epochs"]):
    running_loss = 0.0
    correct = 0
    total = 0

    loop = tqdm(mixed_loader, desc=f"Retrain Epoch {epoch+1}/{CONFIG['retrain_epochs']}")
    for images, labels in loop:
        images = images.to(device)
        labels = labels.to(device)

        optimizer.zero_grad()
        outputs = classifier(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        preds = outputs.argmax(dim=1)
        correct += (preds == labels).sum().item()
        total += labels.size(0)
        
        loop.set_postfix(loss=f"{loss.item():.4f}")

    acc = 100.0 * correct / total
    print(f"Epoch [{epoch+1}/{CONFIG['retrain_epochs']}] | Loss: {running_loss:.4f} | Accuracy: {acc:.2f}%")
    
    if acc < 50.0:
        print("‚ö†Ô∏è WARNING: Accuracy is low. Check if mixed dataset has correct labels.")

torch.save(classifier.state_dict(), CONFIG["hardened_path"])
print(f"\nHardened classifier saved to: {os.path.abspath(CONFIG['hardened_path'])}")

‚úÖ Mixed dataset loaded: 10699 samples
‚úÖ Loaded base classifier from ./models/classifier.pth

üõ°Ô∏è Starting hardening (fine-tuning) for 5 epochs...
   Learning rate: 1e-05 (very low to preserve knowledge)


Retrain Epoch 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:06<00:00, 24.87it/s, loss=0.0118]
Retrain Epoch 1/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:06<00:00, 24.87it/s, loss=0.0118]


Epoch [1/5] | Loss: 15.3783 | Accuracy: 96.84%


Retrain Epoch 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:05<00:00, 29.40it/s, loss=0.0059]
Retrain Epoch 2/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:05<00:00, 29.40it/s, loss=0.0059]


Epoch [2/5] | Loss: 12.1776 | Accuracy: 97.52%


Retrain Epoch 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:05<00:00, 29.95it/s, loss=0.0000]
Retrain Epoch 3/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:05<00:00, 29.95it/s, loss=0.0000]


Epoch [3/5] | Loss: 10.5980 | Accuracy: 97.82%


Retrain Epoch 4/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:05<00:00, 30.01it/s, loss=0.0216]
Retrain Epoch 4/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:05<00:00, 30.01it/s, loss=0.0216]


Epoch [4/5] | Loss: 10.5717 | Accuracy: 98.01%


Retrain Epoch 5/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:08<00:00, 20.75it/s, loss=0.0000]
Retrain Epoch 5/5: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 168/168 [00:08<00:00, 20.75it/s, loss=0.0000]


Epoch [5/5] | Loss: 8.8608 | Accuracy: 98.17%

‚úÖ Hardened classifier saved to: /Users/ayushgourav/projectss/AgenticGAN---Tester-/GTSRBT/AGENT/models/classifier_hardened.pth


---
# Phase V: Evaluation & Visualization

Compare accuracy before and after hardening, and visualize with Grad-CAM.

In [None]:

#  PHASE V-A: EVALUATE ACCURACY (BEFORE VS AFTER)


print("Loading test data...")
test_data = GTSRBDataset(
    csv_file=CONFIG["test_csv"],
    root_dir=CONFIG["data_root"],
    transform=transform
)

test_loader = DataLoader(
    test_data,
    batch_size=CONFIG["classifier_batch_size"],
    shuffle=False,
    num_workers=CONFIG["num_workers"],
    pin_memory=True
)


def evaluate_model(model, loader, desc="Evaluating"):
    model.eval()
    correct = 0
    total = 0

    with torch.no_grad():
        for images, labels in tqdm(loader, desc=desc):
            images = images.to(device)
            labels = labels.to(device)

            outputs = model(images)
            preds = outputs.argmax(dim=1)

            correct += (preds == labels).sum().item()
            total += labels.size(0)

    return 100.0 * correct / total


base_model = RobustCNN(num_classes=CONFIG["num_classes"]).to(device)
if os.path.exists(CONFIG["classifier_path"]):
    base_model.load_state_dict(torch.load(CONFIG["classifier_path"], map_location=device))
    print(f"Loaded base model from {CONFIG['classifier_path']}")
else:
    print("Base model not found!")


hardened_model = RobustCNN(num_classes=CONFIG["num_classes"]).to(device)
if os.path.exists(CONFIG["hardened_path"]):
    hardened_model.load_state_dict(torch.load(CONFIG["hardened_path"], map_location=device))
    print(f"Loaded hardened model from {CONFIG['hardened_path']}")
else:
    print("Hardened model not found!")


print("\nEvaluating Base Model...")
acc_before = evaluate_model(base_model, test_loader, "Base Model")

print("\nEvaluating Hardened Model...")
acc_after = evaluate_model(hardened_model, test_loader, "Hardened Model")



print(f" Accuracy BEFORE Hardening: {acc_before:.2f}%")
print(f" Accuracy AFTER Hardening:  {acc_after:.2f}%")
print(f"Improvement: {acc_after - acc_before:+.2f}%")



results_file = os.path.join(CONFIG["results_dir"], "accuracy_results.txt")
with open(results_file, "w") as f:
    f.write("AgenticGAN GTSRB Results\n")
    f.write("="*50 + "\n")
    f.write(f"Accuracy Before Hardening: {acc_before:.2f}%\n")
    f.write(f"Accuracy After Hardening:  {acc_after:.2f}%\n")
    f.write(f"Improvement: {acc_after - acc_before:+.2f}%\n")

print(f"\nüìÑ Results saved to: {os.path.abspath(results_file)}")

üìä Loading test data...
   Loaded 12630 samples from Test.csv
   Label range: 0 to 42
‚úÖ Loaded base model from ./models/classifier.pth
‚úÖ Loaded hardened model from ./models/classifier_hardened.pth

üìä Evaluating Base Model...


Base Model: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 198/198 [00:05<00:00, 37.48it/s]
Base Model: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 198/198 [00:05<00:00, 37.48it/s]



üìä Evaluating Hardened Model...


Hardened Model: 100%|‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà‚ñà| 198/198 [00:05<00:00, 38.15it/s]


üìä RESULTS SUMMARY
‚úÖ Accuracy BEFORE Hardening: 96.17%
‚úÖ Accuracy AFTER Hardening:  96.73%
üìà Improvement: +0.56%

üìÑ Results saved to: /Users/ayushgourav/projectss/AgenticGAN---Tester-/GTSRBT/AGENT/results/accuracy_results.txt





In [None]:

#PHASE V-B: GRAD-CAM VISUALIZATION

import cv2


# üîπ Grad-CAM Class

class GradCAM:
    def __init__(self, model, target_layer):
        self.model = model
        self.target_layer = target_layer
        self.activations = None
        self.gradients = None

        # Register hooks
        self.target_layer.register_forward_hook(self._forward_hook)
        self.target_layer.register_full_backward_hook(self._backward_hook)

    def _forward_hook(self, module, input, output):
        self.activations = output.detach()

    def _backward_hook(self, module, grad_input, grad_output):
        self.gradients = grad_output[0].detach()

    def generate(self, image, class_idx=None):
        self.model.eval()
        image = image.to(device)

        # Forward pass
        output = self.model(image)

        if class_idx is None:
            class_idx = output.argmax(dim=1).item()

        # Backward pass
        self.model.zero_grad()
        output[0, class_idx].backward()

        # Compute Grad-CAM
        weights = self.gradients.mean(dim=(2, 3), keepdim=True)
        cam = (weights * self.activations).sum(dim=1, keepdim=True)
        cam = F.relu(cam)

        # Normalize
        cam -= cam.min()
        cam /= (cam.max() + 1e-8)

        return cam.squeeze().cpu().numpy()

target_layer = hardened_model.features[10]
gradcam = GradCAM(hardened_model, target_layer)

# Get a test image
img, label = test_data[0]
input_tensor = img.unsqueeze(0).to(device)

# Generate CAM
cam = gradcam.generate(input_tensor)

# Create heatmap overlay
cam_resized = cv2.resize(cam, (32, 32))
heatmap = cv2.applyColorMap(np.uint8(255 * cam_resized), cv2.COLORMAP_JET)

# Denormalize image
img_np = img.permute(1, 2, 0).cpu().numpy()
img_np = (img_np * 0.5 + 0.5)  # back to [0,1]
img_np = np.uint8(255 * img_np)

# Overlay
overlay = cv2.addWeighted(img_np, 0.5, heatmap, 0.5, 0)

# Save
gradcam_dir = os.path.join(CONFIG["results_dir"], "gradcam")
os.makedirs(gradcam_dir, exist_ok=True)
save_path = os.path.join(gradcam_dir, "gradcam_sample.png")
cv2.imwrite(save_path, overlay)

print(f"‚úÖ Grad-CAM saved to: {os.path.abspath(save_path)}")

‚úÖ Grad-CAM saved to: /Users/ayushgourav/projectss/AgenticGAN---Tester-/GTSRBT/AGENT/results/gradcam/gradcam_sample.png



##  Output Files:
- **Models:** `./models/`
  - `classifier.pth` - Base classifier
  - `gan_generator.pth` - Trained generator
  - `gan_discriminator.pth` - Trained discriminator
  - `classifier_hardened.pth` - Hardened classifier

- **Generated Images:** `./generated_images/`
- **Failure Cases:** `./failure_cases/`
- **Results:** `./results/`
  - `accuracy_results.txt`
  - `gradcam/gradcam_sample.png`
