In [None]:
# The model
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.datasets as datasets
import albumentations as A
from albumentations.pytorch import ToTensorV2
from torch.utils.data import Dataset, DataLoader
from tqdm import tqdm
import numpy as np
from sklearn.metrics import precision_recall_fscore_support, classification_report
import matplotlib.pyplot as plt
import gc
import os
import json
from datetime import datetime
import seaborn as sns
from sklearn.metrics import confusion_matrix

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

# Memory management function
def clear_memory():
    gc.collect()
    if torch.cuda.is_available():
        torch.cuda.empty_cache()

# Training history tracking class
class TrainingHistory:
    def __init__(self):
        self.history = {
            'epoch': [],
            'learning_rate': [],
            'train_loss': [],
            'train_acc': [],
            'test_loss': [],
            'test_acc': [],
            'precision': [],
            'recall': [],
            'f1_score': [],
            'timestamp': [],
            'test_probs': [],
            'test_labels': [],
            'confusion_matrix_path': []
        }

    def update(self, epoch, lr, train_loss, train_acc, test_loss, test_acc, metrics, probs=None, labels=None):
        self.history['epoch'].append(epoch)
        self.history['learning_rate'].append(lr)
        self.history['train_loss'].append(float(train_loss))
        self.history['train_acc'].append(float(train_acc))
        self.history['test_loss'].append(float(test_loss))
        self.history['test_acc'].append(float(test_acc))
        self.history['precision'].append(metrics['precision'])
        self.history['recall'].append(metrics['recall'])
        self.history['f1_score'].append(metrics['f1'])
        self.history['timestamp'].append(datetime.now().strftime("%Y-%m-%d %H:%M:%S"))
        self.history['confusion_matrix_path'].append(f'confusion_matrix_epoch_{epoch}.png')

        if probs is not None and labels is not None:
            self.history['test_probs'].append([prob.tolist() for prob in probs])
            self.history['test_labels'].append([int(label) for label in labels])

    def save(self, file_path):
        with open(file_path, 'w') as f:
            json.dump(self.history, f, indent=4)

# Data Processing and Augmentation
class SVHNDataset(Dataset):
    def __init__(self, split='train', transform=None):
        self.data = datasets.SVHN(root='./data', split=split, download=True)
        self.transform = transform

    def __len__(self):
        return len(self.data)

    def __getitem__(self, idx):
        image, label = self.data[idx]
        image = np.array(image)

        if self.transform:
            augmented = self.transform(image=image)
            image = augmented['image']
        
        return image, label

# Data augmentation class
class SVHNAugmentation:
    def __init__(self):
        self.train_transform = A.Compose([
            A.RandomResizedCrop(height=32, width=32, scale=(0.8, 1.0)),
            A.HorizontalFlip(),
            A.Rotate(limit=20),
            A.RandomBrightnessContrast(brightness_limit=0.2, contrast_limit=0.2, p=0.5),
            A.Normalize(mean=[0.4377, 0.4438, 0.4728], std=[0.1980, 0.2010, 0.1970]),
            ToTensorV2()
        ])
        
        self.test_transform = A.Compose([
            A.Normalize(mean=[0.4377, 0.4438, 0.4728], std=[0.1980, 0.2010, 0.1970]),
            ToTensorV2()
        ])

    def get_train_transform(self):
        return self.train_transform

    def get_test_transform(self):
        return self.test_transform

# Define the VGG model
class VGG(nn.Module):
    def __init__(self, dropout_rate=0.25):
        super(VGG, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(dropout_rate),

            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(dropout_rate),

            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(),
            nn.MaxPool2d(kernel_size=2, stride=2),
            nn.Dropout(dropout_rate),
        )
        
        self.fc_layers = nn.Sequential(
            nn.Linear(128 * 4 * 4, 256),
            nn.BatchNorm1d(256),
            nn.ReLU(),
            nn.Dropout(dropout_rate * 2),
            nn.Linear(256, 10)
        )

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc_layers(x)
        return x

def plot_confusion_matrix(y_true, y_pred, save_path=None):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()
    
    plt.clf()

# Training function for one epoch
def train_epoch(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    progress_bar = tqdm(train_loader, desc='Training')
    for images, labels in progress_bar:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = outputs.max(1)
        total += labels.size(0)
        correct += predicted.eq(labels).sum().item()
        
        progress_bar.set_postfix({'Loss': running_loss / total, 'Acc': 100. * correct / total})
        
    return running_loss / len(train_loader), 100. * correct / total

# Evaluation function
def evaluate(model, test_loader, criterion):
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    all_probs = []
    all_labels = []
    all_preds = []
    
    with torch.no_grad():
        for images, labels in tqdm(test_loader, desc='Evaluating'):
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item()
            probs = torch.nn.functional.softmax(outputs, dim=1)
            _, predicted = outputs.max(1)
            total += labels.size(0)
            correct += predicted.eq(labels).sum().item()
            
            all_probs.extend(probs.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
            all_preds.extend(predicted.cpu().numpy())

    avg_loss = running_loss / len(test_loader)
    accuracy = 100. * correct / total
    
    precision, recall, f1, _ = precision_recall_fscore_support(
        all_labels, all_preds, average='weighted'
    )
    
    metrics = {
        'precision': float(precision),
        'recall': float(recall),
        'f1': float(f1)
    }
    
    return avg_loss, accuracy, all_probs, all_labels, metrics

def run_experiment(config_id, params):
    try:
        num_epochs = params['num_epochs']
        batch_size = params['batch_size']
        learning_rate = params['learning_rate']
        optimizer_type = params['optimizer']
        dropout_rate = params['dropout']
        
        augmentation = SVHNAugmentation()
        augmentation.train_transform = A.Compose([
            A.RandomResizedCrop(height=32, width=32, scale=params['crop_scale']),
            A.HorizontalFlip(p=params['flip_prob']),
            A.Rotate(limit=params['rotate_limit']),
            A.RandomBrightnessContrast(
                brightness_limit=params['brightness_limit'],
                contrast_limit=params['contrast_limit'],
                p=0.5
            ),
            A.Normalize(mean=[0.4377, 0.4438, 0.4728], 
                       std=[0.1980, 0.2010, 0.1970]),
            ToTensorV2()
        ])

        history = TrainingHistory()
        
        train_dataset = SVHNDataset(split='train', 
                                  transform=augmentation.get_train_transform())
        test_dataset = SVHNDataset(split='test', 
                                 transform=augmentation.get_test_transform())
        
        train_loader = DataLoader(train_dataset, batch_size=batch_size, 
                                shuffle=True, num_workers=0)
        test_loader = DataLoader(test_dataset, batch_size=batch_size, 
                               shuffle=False, num_workers=0)
        
        model = VGG(dropout_rate=dropout_rate).to(device)
        
        if optimizer_type == 'adam':
            optimizer = optim.Adam(model.parameters(), lr=learning_rate)
        elif optimizer_type == 'sgd':
            optimizer = optim.SGD(model.parameters(), lr=learning_rate, 
                                momentum=0.9)
        elif optimizer_type == 'rmsprop':
            optimizer = optim.RMSprop(model.parameters(), lr=learning_rate)
            
        criterion = nn.CrossEntropyLoss()
        
        os.makedirs(f'./models/experiment_{config_id}', exist_ok=True)
        
        best_acc = 0
        for epoch in range(num_epochs):
            train_loss, train_acc = train_epoch(model, train_loader, 
                                              criterion, optimizer)
            test_loss, test_acc, probs, labels, metrics = evaluate(
                model, test_loader, criterion
            )
            
            history.update(epoch + 1, learning_rate, train_loss, train_acc, 
                         test_loss, test_acc, metrics, probs=probs, 
                         labels=labels)
            
            if test_acc > best_acc:
                best_acc = test_acc
                torch.save({
                    'epoch': epoch,
                    'model_state_dict': model.state_dict(),
                    'optimizer_state_dict': optimizer.state_dict(),
                    'best_acc': best_acc,
                    'config': params
                }, f'./models/experiment_{config_id}/best_model.pth')
            
            history.save(f'./models/experiment_{config_id}/training_history.json')
        
        return best_acc
        
    except Exception as e:
        print(f"Error in experiment {config_id}: {e}")
        raise

# 10 different configurations
configurations_Low_dropout = [
    {
    'num_epochs': 50,
    'batch_size': 64,
    'learning_rate': 0.001,
    'optimizer': 'adam',  
    'dropout': 0.25,
    'crop_scale': (0.8, 1.0),
    'flip_prob': 0.3,    
    'rotate_limit': 15,  # 適度旋轉
    'brightness_limit': 0.2,
    'contrast_limit': 0.2
}
]

configurations_test = [
    {
        # Adam配置
        'num_epochs': 50,
        'batch_size': 64,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'dropout': 0.25,
        'crop_scale': (0.8, 1.0),
        'flip_prob': 0.3,
        'rotate_limit': 15,
        'brightness_limit': 0.2,
        'contrast_limit': 0.2
    },
    {
        # RMSprop配置
        'num_epochs': 55,
        'batch_size': 128,
        'learning_rate': 0.0005,
        'optimizer': 'rmsprop',
        'dropout': 0.3,
        'crop_scale': (0.85, 1.0),
        'flip_prob': 0.25,
        'rotate_limit': 12,
        'brightness_limit': 0.15,
        'contrast_limit': 0.15
    },
    {
        # SGD配置
        'num_epochs': 60,
        'batch_size': 256,
        'learning_rate': 0.01,
        'optimizer': 'sgd',
        'dropout': 0.35,
        'crop_scale': (0.75, 1.0),
        'flip_prob': 0.35,
        'rotate_limit': 20,
        'brightness_limit': 0.25,
        'contrast_limit': 0.25
    }
]

configurations = [
    {
        # Base configuration
        'num_epochs': 50,
        'batch_size': 64,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'dropout': 0.25,
        'crop_scale': (0.8, 1.0),
        'flip_prob': 0.5,
        'rotate_limit': 20,
        'brightness_limit': 0.2,
        'contrast_limit': 0.2
    },
    {
        # High learning rate with SGD
        'num_epochs': 60,
        'batch_size': 128,
        'learning_rate': 0.01,
        'optimizer': 'sgd',
        'dropout': 0.3,
        'crop_scale': (0.7, 1.0),
        'flip_prob': 0.7,
        'rotate_limit': 30,
        'brightness_limit': 0.3,
        'contrast_limit': 0.3
    },
    {
        # Small batch size with RMSprop
        'num_epochs': 40,
        'batch_size': 32,
        'learning_rate': 0.0005,
        'optimizer': 'rmsprop',
        'dropout': 0.2,
        'crop_scale': (0.9, 1.0),
        'flip_prob': 0.3,
        'rotate_limit': 15,
        'brightness_limit': 0.1,
        'contrast_limit': 0.1
    },
    {
        # Strong augmentation
        'num_epochs': 70,
        'batch_size': 64,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'dropout': 0.35,
        'crop_scale': (0.6, 1.0),
        'flip_prob': 0.8,
        'rotate_limit': 40,
        'brightness_limit': 0.4,
        'contrast_limit': 0.4
    },
    {
        # Long training
        'num_epochs': 100,
        'batch_size': 96,
        'learning_rate': 0.0008,
        'optimizer': 'adam',
        'dropout': 0.28,
        'crop_scale': (0.75, 1.0),
        'flip_prob': 0.6,
        'rotate_limit': 25,
        'brightness_limit': 0.25,
        'contrast_limit': 0.25
    },
    {
        # Minimal batch size
        'num_epochs': 45,
        'batch_size': 16,
        'learning_rate': 0.0003,
        'optimizer': 'adam',
        'dropout': 0.15,
        'crop_scale': (0.85, 1.0),
        'flip_prob': 0.4,
        'rotate_limit': 10,
        'brightness_limit': 0.15,
        'contrast_limit': 0.15
    },
    {
        # Maximum batch size
        'num_epochs': 30,
        'batch_size': 256,
        'learning_rate': 0.002,
        'optimizer': 'sgd',
        'dropout': 0.4,
        'crop_scale': (0.7, 0.9),
        'flip_prob': 0.7,
        'rotate_limit': 35,
        'brightness_limit': 0.35,
        'contrast_limit': 0.35
    },
    {
        # Weak augmentation
        'num_epochs': 55,
        'batch_size': 80,
        'learning_rate': 0.0015,
        'optimizer': 'rmsprop',
        'dropout': 0.22,
        'crop_scale': (0.9, 1.0),
        'flip_prob': 0.2,
        'rotate_limit': 5,
        'brightness_limit': 0.1,
        'contrast_limit': 0.1
    },
    {
        # High dropout
        'num_epochs': 65,
        'batch_size': 48,
        'learning_rate': 0.0012,
        'optimizer': 'adam',
        'dropout': 0.5,
        'crop_scale': (0.75, 0.95),
        'flip_prob': 0.6,
        'rotate_limit': 28,
        'brightness_limit': 0.28,
        'contrast_limit': 0.28
    },
    {
        # Balanced configuration
        'num_epochs': 50,
        'batch_size': 128,
        'learning_rate': 0.001,
        'optimizer': 'adam',
        'dropout': 0.3,
        'crop_scale': (0.8, 1.0),
        'flip_prob': 0.5,
        'rotate_limit': 25,
        'brightness_limit': 0.2,
        'contrast_limit': 0.2
    }
]

def main():
    results = {}
    
    for i, config in enumerate(configurations_test):
        print(f"\nStarting experiment {i+1}/10")
        print("Configuration:", config)
        
        best_acc = run_experiment(i+1, config)
        results[f"experiment_{i+1}"] = {
            "config": config,
            "best_accuracy": best_acc
        }
        
        with open('./models/experiment_results.json', 'w') as f:
            json.dump(results, f, indent=4)
        
        clear_memory()
        
    print("\nExperiment Results:")
    for exp_id, result in results.items():
        print(f"\n{exp_id}:")
        print(f"Best accuracy: {result['best_accuracy']:.2f}%")
        print("Configuration:", result['config'])

def main_compare_test():
    results = {}
    
    # only run configurations_Low_dropout
    for i, config in enumerate(configurations_test):
        print(f"\nStarting Low Dropout experiment")
        print("Configuration:", config)
        
        best_acc = run_experiment('Test', config)
        results["experiment_low_dropout"] = {
            "config": config,
            "best_accuracy": best_acc
        }
        
        with open('./models/Test_experiment_results.json', 'w') as f:
            json.dump(results, f, indent=4)
        
        clear_memory()
        
    print("\nExperiment Results:")
    for exp_id, result in results.items():
        print(f"\n{exp_id}:")
        print(f"Best accuracy: {result['best_accuracy']:.2f}%")
        print("Configuration:", result['config'])

if __name__ == "__main__":
    main()

Using device: cuda

Starting experiment 10/13
Configuration: {'num_epochs': 50, 'batch_size': 64, 'learning_rate': 0.001, 'optimizer': 'adam', 'dropout': 0.25, 'crop_scale': (0.8, 1.0), 'flip_prob': 0.3, 'rotate_limit': 15, 'brightness_limit': 0.2, 'contrast_limit': 0.2}
Using downloaded and verified file: ./data\train_32x32.mat
Using downloaded and verified file: ./data\test_32x32.mat


Training: 100%|██████████| 1145/1145 [00:54<00:00, 21.04it/s, Loss=0.0152, Acc=67.6]
Evaluating: 100%|██████████| 407/407 [00:07<00:00, 53.82it/s]
Training: 100%|██████████| 1145/1145 [00:54<00:00, 21.13it/s, Loss=0.00859, Acc=82.7]
Evaluating: 100%|██████████| 407/407 [00:07<00:00, 52.41it/s]
Training: 100%|██████████| 1145/1145 [00:56<00:00, 20.15it/s, Loss=0.00744, Acc=85.2]
Evaluating: 100%|██████████| 407/407 [00:07<00:00, 54.49it/s]
Training: 100%|██████████| 1145/1145 [00:56<00:00, 20.09it/s, Loss=0.0068, Acc=86.6] 
Evaluating: 100%|██████████| 407/407 [00:09<00:00, 43.54it/s]
Training: 100%|██████████| 1145/1145 [00:52<00:00, 21.81it/s, Loss=0.00631, Acc=87.7]
Evaluating: 100%|██████████| 407/407 [00:07<00:00, 56.29it/s]
Training: 100%|██████████| 1145/1145 [00:55<00:00, 20.79it/s, Loss=0.00605, Acc=88.3]
Evaluating: 100%|██████████| 407/407 [00:07<00:00, 52.77it/s]
Training: 100%|██████████| 1145/1145 [00:53<00:00, 21.41it/s, Loss=0.00581, Acc=88.8]
Evaluating: 100%|██████████


Starting experiment 11/13
Configuration: {'num_epochs': 55, 'batch_size': 128, 'learning_rate': 0.0005, 'optimizer': 'rmsprop', 'dropout': 0.3, 'crop_scale': (0.85, 1.0), 'flip_prob': 0.25, 'rotate_limit': 12, 'brightness_limit': 0.15, 'contrast_limit': 0.15}
Using downloaded and verified file: ./data\train_32x32.mat
Using downloaded and verified file: ./data\test_32x32.mat


Training: 100%|██████████| 573/573 [00:49<00:00, 11.63it/s, Loss=0.00909, Acc=61]  
Evaluating: 100%|██████████| 204/204 [00:07<00:00, 28.06it/s]
Training: 100%|██████████| 573/573 [00:50<00:00, 11.27it/s, Loss=0.00462, Acc=81.3]
Evaluating: 100%|██████████| 204/204 [00:06<00:00, 30.94it/s]
Training: 100%|██████████| 573/573 [00:45<00:00, 12.65it/s, Loss=0.00394, Acc=84.2]
Evaluating: 100%|██████████| 204/204 [00:06<00:00, 32.49it/s]
Training: 100%|██████████| 573/573 [00:46<00:00, 12.43it/s, Loss=0.00353, Acc=86.2]
Evaluating: 100%|██████████| 204/204 [00:06<00:00, 31.08it/s]
Training: 100%|██████████| 573/573 [00:46<00:00, 12.42it/s, Loss=0.00331, Acc=86.9]
Evaluating: 100%|██████████| 204/204 [00:06<00:00, 33.44it/s]
Training: 100%|██████████| 573/573 [00:44<00:00, 12.97it/s, Loss=0.00309, Acc=87.9]
Evaluating: 100%|██████████| 204/204 [00:06<00:00, 31.40it/s]
Training: 100%|██████████| 573/573 [00:50<00:00, 11.34it/s, Loss=0.00298, Acc=88.5]
Evaluating: 100%|██████████| 204/204 [00


Starting experiment 12/13
Configuration: {'num_epochs': 60, 'batch_size': 256, 'learning_rate': 0.01, 'optimizer': 'sgd', 'dropout': 0.35, 'crop_scale': (0.75, 1.0), 'flip_prob': 0.35, 'rotate_limit': 20, 'brightness_limit': 0.25, 'contrast_limit': 0.25}
Using downloaded and verified file: ./data\train_32x32.mat
Using downloaded and verified file: ./data\test_32x32.mat


Training: 100%|██████████| 287/287 [00:42<00:00,  6.80it/s, Loss=0.00748, Acc=33]  
Evaluating: 100%|██████████| 102/102 [00:06<00:00, 16.59it/s]
Training: 100%|██████████| 287/287 [00:43<00:00,  6.56it/s, Loss=0.00513, Acc=55.3]
Evaluating: 100%|██████████| 102/102 [00:06<00:00, 16.43it/s]
Training: 100%|██████████| 287/287 [00:39<00:00,  7.18it/s, Loss=0.00377, Acc=68.4]
Evaluating: 100%|██████████| 102/102 [00:05<00:00, 17.75it/s]
Training: 100%|██████████| 287/287 [00:39<00:00,  7.21it/s, Loss=0.00317, Acc=73.8]
Evaluating: 100%|██████████| 102/102 [00:05<00:00, 17.64it/s]
Training: 100%|██████████| 287/287 [00:41<00:00,  6.94it/s, Loss=0.00281, Acc=77]  
Evaluating: 100%|██████████| 102/102 [00:06<00:00, 16.07it/s]
Training: 100%|██████████| 287/287 [00:43<00:00,  6.56it/s, Loss=0.00256, Acc=79.2]
Evaluating: 100%|██████████| 102/102 [00:06<00:00, 15.04it/s]
Training: 100%|██████████| 287/287 [00:42<00:00,  6.74it/s, Loss=0.00243, Acc=80.6]
Evaluating: 100%|██████████| 102/102 [00


Experiment Results:

experiment_10:
Best accuracy: 96.26%
Configuration: {'num_epochs': 50, 'batch_size': 64, 'learning_rate': 0.001, 'optimizer': 'adam', 'dropout': 0.25, 'crop_scale': (0.8, 1.0), 'flip_prob': 0.3, 'rotate_limit': 15, 'brightness_limit': 0.2, 'contrast_limit': 0.2}

experiment_11:
Best accuracy: 96.10%
Configuration: {'num_epochs': 55, 'batch_size': 128, 'learning_rate': 0.0005, 'optimizer': 'rmsprop', 'dropout': 0.3, 'crop_scale': (0.85, 1.0), 'flip_prob': 0.25, 'rotate_limit': 12, 'brightness_limit': 0.15, 'contrast_limit': 0.15}

experiment_12:
Best accuracy: 95.62%
Configuration: {'num_epochs': 60, 'batch_size': 256, 'learning_rate': 0.01, 'optimizer': 'sgd', 'dropout': 0.35, 'crop_scale': (0.75, 1.0), 'flip_prob': 0.35, 'rotate_limit': 20, 'brightness_limit': 0.25, 'contrast_limit': 0.25}


In [None]:
# Build the curves
import matplotlib.pyplot as plt
from sklearn.metrics import roc_curve, auc, confusion_matrix
import seaborn as sns
import numpy as np
import json
import os


def plot_confusion_matrix(y_true, y_pred, save_path=None):
    cm = confusion_matrix(y_true, y_pred)
    plt.figure(figsize=(10, 8))
    sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
    plt.title('Confusion Matrix')
    plt.ylabel('True Label')
    plt.xlabel('Predicted Label')
    
    if save_path:
        os.makedirs(os.path.dirname(save_path), exist_ok=True)
        plt.savefig(save_path)
        plt.close()
    else:
        plt.show()
    
    plt.clf()

def create_training_plots(history, config_id):
    # Create a figure with multiple subplots
    fig = plt.figure(figsize=(20, 15))
    
    # 1. Training and Validation Loss
    plt.subplot(2, 2, 1)
    plt.plot(history['epoch'], history['train_loss'], 'b-', label='Training Loss')
    plt.plot(history['epoch'], history['test_loss'], 'r-', label='Validation Loss')
    plt.title('Training and Validation Loss', fontsize=12)
    plt.xlabel('Epoch')
    plt.ylabel('Loss')
    plt.grid(True)
    plt.legend()
    
    # 2. Training and Validation Accuracy
    plt.subplot(2, 2, 2)
    plt.plot(history['epoch'], history['train_acc'], 'b-', label='Training Accuracy')
    plt.plot(history['epoch'], history['test_acc'], 'r-', label='Validation Accuracy')
    plt.title('Training and Validation Accuracy', fontsize=12)
    plt.xlabel('Epoch')
    plt.ylabel('Accuracy (%)')
    plt.grid(True)
    plt.legend()
    
    # 3. Precision, Recall, and F1 Score
    plt.subplot(2, 2, 3)
    plt.plot(history['epoch'], history['precision'], 'b-', label='Precision')
    plt.plot(history['epoch'], history['recall'], 'r-', label='Recall')
    plt.plot(history['epoch'], history['f1_score'], 'g-', label='F1 Score')
    plt.title('Model Metrics Over Time', fontsize=12)
    plt.xlabel('Epoch')
    plt.ylabel('Score')
    plt.grid(True)
    plt.legend()
    
    # 4. ROC Curves for last epoch
    plt.subplot(2, 2, 4)
    
    # Get the last epoch's probabilities and labels
    last_probs = np.array(history['test_probs'][-1])
    last_labels = np.array(history['test_labels'][-1])
    
    colors = plt.cm.tab10(np.linspace(0, 1, 10))
    # Calculate ROC curve for each class
    for i in range(10):  # 10 classes for SVHN
        fpr, tpr, _ = roc_curve((last_labels == i).astype(int), last_probs[:, i])
        roc_auc = auc(fpr, tpr)
        plt.plot(fpr, tpr, color=colors[i], label=f'Class {i} (AUC = {roc_auc:.2f})')
    
    plt.plot([0, 1], [0, 1], 'k--')
    plt.xlim([0.0, 1.0])
    plt.ylim([0.0, 1.05])
    plt.xlabel('False Positive Rate')
    plt.ylabel('True Positive Rate')
    plt.title('ROC Curves (Last Epoch)', fontsize=12)
    plt.grid(True)
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    
    plt.tight_layout()
    plt.savefig(f'./models/experiment_{config_id}/training_plots_{config_id}.png', bbox_inches='tight', dpi=300)
    plt.close()

def create_class_performance_plot(history, config_id):
    # Get the last epoch's probabilities and labels
    last_probs = np.array(history['test_probs'][-1])
    last_labels = np.array(history['test_labels'][-1])
    predictions = np.argmax(last_probs, axis=1)
    
    # Calculate class-wise metrics
    class_accuracy = []
    class_precision = []
    class_recall = []
    
    for i in range(10):
        class_mask = last_labels == i
        if np.sum(class_mask) > 0:
            class_accuracy.append(np.mean(predictions[class_mask] == i))
            class_precision.append(np.sum((predictions == i) & (last_labels == i)) / np.sum(predictions == i))
            class_recall.append(np.sum((predictions == i) & (last_labels == i)) / np.sum(last_labels == i))
    
    # Create bar plot
    fig, ax = plt.subplots(figsize=(12, 6))
    x = np.arange(10)
    width = 0.25
    
    ax.bar(x - width, class_accuracy, width, label='Accuracy', color='skyblue')
    ax.bar(x, class_precision, width, label='Precision', color='lightgreen')
    ax.bar(x + width, class_recall, width, label='Recall', color='salmon')
    
    ax.set_ylabel('Score')
    ax.set_title('Class-wise Performance Metrics', fontsize=12)
    ax.set_xticks(x)
    ax.set_xticklabels([f'Class {i}' for i in range(10)])
    ax.legend()
    ax.grid(True, alpha=0.3)
    
    plt.tight_layout()
    plt.savefig(f'./models/experiment_{config_id}/class_performance_{config_id}.png', dpi=300)
    plt.close()

for config_id in range(10, 13):
    # Load the training history
    with open(f'./models/experiment_{config_id}/training_history.json', 'r') as f:
        history = json.load(f)

    # Create the plots
    create_training_plots(history, config_id)
    create_class_performance_plot(history, config_id)

    # Create confusion matrix
    last_labels = np.array(history['test_labels'][-1])
    last_probs = np.array(history['test_probs'][-1])
    predictions = np.argmax(last_probs, axis=1)
    
    # Save confusion matrix
    confusion_matrix_path = f'./models/experiment_{config_id}/confusion_matrix_{config_id}.png'
    plot_confusion_matrix(last_labels, predictions, confusion_matrix_path)

<Figure size 640x480 with 0 Axes>

In [None]:
# Build the table
import pandas as pd
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

def create_summary_table(all_experiments):
    # Initialize lists to store results
    results = []
    
    for config_id, history in all_experiments.items():
        # Get the last epoch's results
        last_probs = np.array(history['test_probs'][-1])
        last_labels = np.array(history['test_labels'][-1])
        predictions = np.argmax(last_probs, axis=1)
        
        # Calculate metrics
        accuracy = accuracy_score(last_labels, predictions)
        precision = precision_score(last_labels, predictions, average='macro')
        recall = recall_score(last_labels, predictions, average='macro')
        f1 = f1_score(last_labels, predictions, average='macro')
        
        # Get final loss values
        final_train_loss = history['train_loss'][-1]
        final_val_loss = history['test_loss'][-1]
        
        # Store results
        results.append({
            'Experiment ID': f'Config {config_id}',
            'Accuracy': f'{accuracy:.4f}',
            'Precision': f'{precision:.4f}',
            'Recall': f'{recall:.4f}',
            'F1 Score': f'{f1:.4f}',
            'Final Train Loss': f'{final_train_loss:.4f}',
            'Final Val Loss': f'{final_val_loss:.4f}'
        })
    
    # Create DataFrame
    df = pd.DataFrame(results)
    return df

# Load all experiment results
all_experiments = {}
for config_id in range(1, 11):
    try:
        with open(f'./models/experiment_{config_id}/training_history.json', 'r') as f:
            all_experiments[config_id] = json.load(f)
    except FileNotFoundError:
        print(f"No data found for experiment {config_id}")
        continue

# Create and display summary table
summary_table = create_summary_table(all_experiments)
print("\nExperiment Results Summary:")
print(summary_table.to_string(index=False))

# Save to CSV (optional)
summary_table.to_csv('./experiment_summary.csv', index=False)


Experiment Results Summary:
Experiment ID Accuracy Precision Recall F1 Score Final Train Loss Final Val Loss
     Config 0   0.9491    0.9460 0.9461   0.9459           0.2078         0.1880
