In [17]:
import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset, ConcatDataset
from torchvision.datasets import ImageFolder
import matplotlib.pyplot as plt
import time
from tqdm.notebook import tqdm
from torchvision.models import efficientnet_b0, EfficientNet_B0_Weights
from torch.optim.lr_scheduler import ReduceLROnPlateau, CosineAnnealingLR
import copy
from PIL import Image, ImageFilter
import torchvision.transforms.functional as TF
from sklearn.metrics import confusion_matrix
import seaborn as sns

# Create a smaller subset of data for quicker experiments
TRAIN_SUBSET_SIZE = 8000  # Reduced from 10000
VALID_SUBSET_SIZE = 8000
TEST_SUBSET_SIZE = 8000


In [18]:

# Set random seeds for reproducibility
random_seed = 42
torch.manual_seed(random_seed)
torch.cuda.manual_seed(random_seed)
np.random.seed(random_seed)
random.seed(random_seed)
torch.backends.cudnn.deterministic = True
torch.backends.cudnn.benchmark = False

In [19]:
# Function for checking device
def get_device():
    if torch.backends.mps.is_available():
        try:
            # Try creating a tensor on MPS
            test_tensor = torch.zeros(1, device="mps")
            print(f"Test tensor created on MPS successfully: {test_tensor.device}")
            print("MPS is working properly")
            return torch.device("mps")
        except Exception as e:
            print(f"Error initializing MPS: {e}")
    
    print("Using CPU")
    return torch.device("cpu")

device = get_device()
print(f"Using device: {device}")


Test tensor created on MPS successfully: mps:0
MPS is working properly
Using device: mps


In [20]:
# Create dataset subset function
def create_subset_dataset(original_dataset, num_samples=10000, balanced=True):
    """
    Create a subset of the original dataset with equal class distribution
    """
    if balanced:
        # Get class labels
        targets = torch.tensor([target for _, target in original_dataset.samples])
        classes = torch.unique(targets)
        num_classes = len(classes)
        samples_per_class = num_samples // num_classes
        
        indices = []
        for cls in classes:
            cls_indices = torch.where(targets == cls)[0]
            # If we have fewer samples than requested, take all of them
            if len(cls_indices) <= samples_per_class:
                indices.extend(cls_indices.tolist())
            else:
                # Otherwise randomly sample
                selected = cls_indices[torch.randperm(len(cls_indices))[:samples_per_class]]
                indices.extend(selected.tolist())
        
        return Subset(original_dataset, indices)
    else:
        # Simple random subset
        return Subset(original_dataset, torch.randperm(len(original_dataset))[:num_samples].tolist())

In [40]:
# Define different data augmentation techniques
def cutout_transform(img, n_holes=1, length=16):
    """Apply cutout augmentation to a tensor image"""
    h = img.size(1)
    w = img.size(2)
    
    mask = torch.ones((h, w))
    
    for n in range(n_holes):
        y = np.random.randint(h)
        x = np.random.randint(w)
        
        y1 = np.clip(y - length // 2, 0, h)
        y2 = np.clip(y + length // 2, 0, h)
        x1 = np.clip(x - length // 2, 0, w)
        x2 = np.clip(x + length // 2, 0, w)
        
        mask[y1: y2, x1: x2] = 0.
    
    mask = mask.expand_as(img)
    return img * mask

class CutoutTransform:
    """Wrapper class for the cutout transform to use in torchvision transforms"""
    def __init__(self, n_holes=1, length=16):
        self.n_holes = n_holes
        self.length = length
    
    def __call__(self, img):
        return cutout_transform(img, self.n_holes, self.length)

In [41]:
# Create dictionaries of transforms for our experiments
standard_transforms = {
    'baseline': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    
    'horizontal_flip': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(p=0.5),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    
    'rotation': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomRotation(degrees=15),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    
    'color_jitter': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.ColorJitter(brightness=0.2, contrast=0.2, saturation=0.2, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ]),
    
    'combined_standard': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.RandomRotation(15),
        transforms.ColorJitter(brightness=0.1, contrast=0.1, saturation=0.1, hue=0.1),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
    ])
}

# Advanced augmentation with Cutout
advanced_transforms = {
    'cutout': transforms.Compose([
        transforms.Resize((224, 224)),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize(mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225]),
        CutoutTransform(n_holes=1, length=32)
    ])
}

In [23]:
# Load full datasets first
print("Loading original datasets...")
full_train_dataset = ImageFolder(root='data/train', transform=standard_transforms['baseline'])
full_valid_dataset = ImageFolder(root='data/valid', transform=standard_transforms['baseline'])
full_test_dataset = ImageFolder(root='data/test', transform=standard_transforms['baseline'])

# Create reduced subsets
print("Creating subsets...")
train_dataset = create_subset_dataset(full_train_dataset, num_samples=TRAIN_SUBSET_SIZE)
valid_dataset = create_subset_dataset(full_valid_dataset, num_samples=VALID_SUBSET_SIZE)
test_dataset = create_subset_dataset(full_test_dataset, num_samples=TEST_SUBSET_SIZE)

# Save class names
class_names = full_train_dataset.classes
num_classes = len(class_names)
print(f"Classes: {class_names}")
print(f"Training samples: {len(train_dataset)}")
print(f"Validation samples: {len(valid_dataset)}")
print(f"Test samples: {len(test_dataset)}")

Loading original datasets...
Creating subsets...
Classes: ['airplane', 'automobile', 'bird', 'cat', 'deer', 'dog', 'frog', 'horse', 'ship', 'truck']
Training samples: 8000
Validation samples: 8000
Test samples: 8000


In [24]:
# Define model architectures
def create_efficientnet_model(num_classes=10, pretrained=True):
    """Create EfficientNet B0 model"""
    if pretrained:
        weights = EfficientNet_B0_Weights.DEFAULT
        model = efficientnet_b0(weights=weights)
    else:
        model = efficientnet_b0(weights=None)
    
    # Replace classifier
    in_features = model.classifier[1].in_features
    model.classifier = nn.Sequential(
        nn.Dropout(p=0.2, inplace=True),
        nn.Linear(in_features=in_features, out_features=num_classes),
    )
    
    return model

In [25]:
# Define custom CNN as a third architecture option
class CustomCNN(nn.Module):
    def __init__(self, num_classes=10):
        super(CustomCNN, self).__init__()
        self.features = nn.Sequential(
            # First conv block
            nn.Conv2d(3, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.Conv2d(32, 32, kernel_size=3, padding=1),
            nn.BatchNorm2d(32),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Second conv block
            nn.Conv2d(32, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.Conv2d(64, 64, kernel_size=3, padding=1),
            nn.BatchNorm2d(64),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
            
            # Third conv block
            nn.Conv2d(64, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.Conv2d(128, 128, kernel_size=3, padding=1),
            nn.BatchNorm2d(128),
            nn.ReLU(inplace=True),
            nn.MaxPool2d(kernel_size=2, stride=2),
        )
        
        # Calculate input size to the classifier
        self.avgpool = nn.AdaptiveAvgPool2d((1, 1))
        
        self.classifier = nn.Sequential(
            nn.Dropout(0.5),
            nn.Linear(128, 256),
            nn.ReLU(inplace=True),
            nn.Dropout(0.5),
            nn.Linear(256, num_classes),
        )
    
    def forward(self, x):
        x = self.features(x)
        x = self.avgpool(x)
        x = torch.flatten(x, 1)
        x = self.classifier(x)
        return x

In [26]:
# Training and validation functions
def train_epoch(model, train_loader, criterion, optimizer, device, epoch):
    """Train for one epoch"""
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with tqdm(train_loader, desc=f"Epoch {epoch+1}", leave=False) as t:
        for images, labels in t:
            images, labels = images.to(device), labels.to(device)
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            t.set_postfix(loss=loss.item(), acc=100.*correct/total)
    
    epoch_loss = running_loss / len(train_loader.dataset)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc

In [27]:
def validate(model, valid_loader, criterion, device):
    """Validate the model"""
    model.eval()
    running_loss = 0.0
    correct = 0
    total = 0
    
    with torch.no_grad():
        for images, labels in valid_loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            running_loss += loss.item() * images.size(0)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    epoch_loss = running_loss / len(valid_loader.dataset)
    epoch_acc = 100 * correct / total
    
    return epoch_loss, epoch_acc

In [28]:
def evaluate(model, test_loader, device):
    """Evaluate model on test set"""
    model.eval()
    correct = 0
    total = 0
    
    all_preds = []
    all_labels = []
    
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            
            all_preds.extend(predicted.cpu().numpy())
            all_labels.extend(labels.cpu().numpy())
    
    accuracy = 100 * correct / total
    return accuracy, all_preds, all_labels

In [43]:
# Function to conduct an experiment
def run_experiment(experiment_name, model, train_dataset, valid_dataset, test_dataset, 
                  hyperparams, batch_size=64, num_epochs=10):
    """
    Run a full training experiment with given hyperparameters
    """
    print(f"\n{'='*20} Running experiment: {experiment_name} {'='*20}")
    for key, value in hyperparams.items():
        print(f"{key}: {value}")
    
    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True, num_workers=0, pin_memory=True)
    valid_loader = DataLoader(valid_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False, num_workers=0, pin_memory=True)
    
    # Initialize optimizer
    if hyperparams['optimizer'] == 'adam':
        optimizer = optim.Adam(model.parameters(), lr=hyperparams['learning_rate'], 
                              weight_decay=hyperparams['weight_decay'])
    elif hyperparams['optimizer'] == 'sgd':
        optimizer = optim.SGD(model.parameters(), lr=hyperparams['learning_rate'], 
                             momentum=0.9, weight_decay=hyperparams['weight_decay'])
    
    # Initialize criterion
    criterion = nn.CrossEntropyLoss()
    
    # Initialize scheduler
    if hyperparams['scheduler'] == 'plateau':
        scheduler = ReduceLROnPlateau(optimizer, mode='min', factor=0.5, patience=3)
    elif hyperparams['scheduler'] == 'cosine':
        scheduler = CosineAnnealingLR(optimizer, T_max=num_epochs)
    
    # Lists to store metrics
    train_losses = []
    valid_losses = []
    train_accuracies = []
    valid_accuracies = []
    
    # Training loop
    start_time = time.time()
    best_val_acc = 0
    best_model_wts = None
    
    for epoch in range(num_epochs):
        # Train and validate
        train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device, epoch)
        valid_loss, valid_acc = validate(model, valid_loader, criterion, device)
        
        # Scheduler step
        if hyperparams['scheduler'] == 'plateau':
            scheduler.step(valid_loss)
        else:
            scheduler.step()
        
        # Store metrics
        train_losses.append(train_loss)
        valid_losses.append(valid_loss)
        train_accuracies.append(train_acc)
        valid_accuracies.append(valid_acc)
        
        # Print statistics
        print(f"Epoch {epoch+1}/{num_epochs} | LR: {optimizer.param_groups[0]['lr']:.6f}")
        print(f"Train Loss: {train_loss:.4f} | Train Acc: {train_acc:.2f}%")
        print(f"Valid Loss: {valid_loss:.4f} | Valid Acc: {valid_acc:.2f}%")
        
        # Save best model
        if valid_acc > best_val_acc:
            best_val_acc = valid_acc
            best_model_wts = copy.deepcopy(model.state_dict())
    
    total_time = time.time() - start_time
    print(f"Training completed in {total_time/60:.2f} minutes")
    
    # Load best model
    model.load_state_dict(best_model_wts)
    
    # Evaluate on test set
    test_acc, all_preds, all_labels = evaluate(model, test_loader, device)
    print(f"Test Accuracy: {test_acc:.2f}%")
    
    # Create confusion matrix
    cm = confusion_matrix(all_labels, all_preds)
    
    # Return results
    results = {
        'name': experiment_name,
        'hyperparams': hyperparams,
        'train_losses': train_losses,
        'valid_losses': valid_losses,
        'train_accuracies': train_accuracies,
        'valid_accuracies': valid_accuracies,
        'test_accuracy': test_acc,
        'confusion_matrix': cm,
        'model_state': best_model_wts,
        'all_preds': all_preds,
        'all_labels': all_labels
    }
    
    return results

In [30]:
# ======= Few-shot learning implementation =======
class PrototypicalNetworks(nn.Module):
    def __init__(self, backbone):
        super(PrototypicalNetworks, self).__init__()
        self.backbone = backbone
    
    def forward(self, support_images, support_labels, query_images):
        """
        Implements the forward pass of Prototypical Networks
        
        Args:
            support_images: support set images [n_classes * n_shots, channels, height, width]
            support_labels: support set labels [n_classes * n_shots]
            query_images: query set images [n_queries, channels, height, width]
        
        Returns:
            query_logits: classification logits for the query images
        """
        # Extract features
        support_features = self.backbone(support_images)  # [n_classes * n_shots, feature_dim]
        query_features = self.backbone(query_images)      # [n_queries, feature_dim]
        
        # Compute class prototypes
        n_classes = len(torch.unique(support_labels))
        prototypes = torch.zeros(n_classes, support_features.shape[1], device=support_features.device)
        
        for c in range(n_classes):
            # Select features of class c
            class_mask = (support_labels == c)
            class_features = support_features[class_mask]
            # Average features to get the prototype
            prototypes[c] = class_features.mean(dim=0)
        
        # Compute distances between query features and prototypes
        # Expand dimensions for broadcasting
        query_features = query_features.unsqueeze(1)  # [n_queries, 1, feature_dim]
        prototypes = prototypes.unsqueeze(0)          # [1, n_classes, feature_dim]
        
        # Compute Euclidean distances
        distances = torch.sum((query_features - prototypes)**2, dim=2)
        
        # Convert distances to logits (negative distances)
        return -distances

In [31]:
def few_shot_evaluation(backbone, test_dataset, device, n_way=5, n_shot=5, n_query=15, n_episodes=100):
    """
    Evaluates the backbone on few-shot classification tasks
    """
    backbone.eval()
    
    # Function to create few-shot tasks
    def create_episode(dataset, n_way, n_shot, n_query):
        # Sample n_way classes
        classes = random.sample(range(len(dataset.classes)), n_way)
        
        # Initialize tensors to store images and labels
        support_images = []
        support_labels = []
        query_images = []
        query_labels = []
        
        # For each class, sample n_shot and n_query examples
        for i, cls in enumerate(classes):
            # Get indices of all examples from this class
            class_indices = [idx for idx, (_, label) in enumerate(dataset) if label == cls]
            # Sample support and query sets
            support_indices = random.sample(class_indices, n_shot)
            remaining_indices = [idx for idx in class_indices if idx not in support_indices]
            query_indices = random.sample(remaining_indices, min(n_query, len(remaining_indices)))
            
            # Add to support and query sets
            for idx in support_indices:
                image, _ = dataset[idx]
                support_images.append(image)
                support_labels.append(i)  # Use an index from 0 to n_way-1 as the label
            
            for idx in query_indices:
                image, _ = dataset[idx]
                query_images.append(image)
                query_labels.append(i)
        
        # Convert to tensors
        support_images = torch.stack(support_images)
        support_labels = torch.tensor(support_labels)
        query_images = torch.stack(query_images)
        query_labels = torch.tensor(query_labels)
        
        return support_images, support_labels, query_images, query_labels
    
    # Create prototypical network
    proto_net = PrototypicalNetworks(backbone).to(device)
    
    # List to store accuracies
    accuracies = []
    
    # Evaluate over multiple episodes
    for episode in tqdm(range(n_episodes), desc="Few-shot evaluation"):
        # Create an episode (task)
        support_images, support_labels, query_images, query_labels = create_episode(
            test_dataset, n_way, n_shot, n_query
        )
        
        # Move to device
        support_images = support_images.to(device)
        support_labels = support_labels.to(device)
        query_images = query_images.to(device)
        query_labels = query_labels.to(device)
        
        # Forward pass
        with torch.no_grad():
            query_logits = proto_net(support_images, support_labels, query_images)
            _, query_preds = torch.max(query_logits, dim=1)
            
            # Calculate accuracy
            accuracy = (query_preds == query_labels).float().mean().item() * 100
            accuracies.append(accuracy)
    
    # Return average accuracy
    avg_accuracy = np.mean(accuracies)
    std_accuracy = np.std(accuracies)
    
    print(f"Few-shot learning ({n_way}-way, {n_shot}-shot): {avg_accuracy:.2f}% ± {std_accuracy:.2f}%")
    
    return avg_accuracy, std_accuracy

In [32]:
# Define feature extractor for few-shot learning
class FeatureExtractor(nn.Module):
    def __init__(self):
        super(FeatureExtractor, self).__init__()
        # Use a pre-trained model but remove the final layer
        self.model = efficientnet_b0(weights=EfficientNet_B0_Weights.DEFAULT)
        # Remove the classifier
        self.features = nn.Sequential(*list(self.model.children())[:-1])
        # Add a flatten layer
        self.flatten = nn.Flatten()
    
    def forward(self, x):
        x = self.features(x)
        x = self.flatten(x)
        return x

In [33]:
# Function to implement ensemble prediction
def ensemble_prediction(models, test_loader, device, method='hard'):
    """
    Implement ensemble prediction using multiple models
    
    Args:
        models: List of trained models
        test_loader: DataLoader for test data
        device: Device to run on
        method: 'hard' for majority voting or 'soft' for probability averaging
    
    Returns:
        accuracy: Ensemble accuracy
    """
    
    if not models:
        return 0.0, [], []
    
    all_predictions = []
    all_probs = []
    
    # Get predictions from each model
    for model in models:
        model.eval()
        predictions = []
        probabilities = []
        
        with torch.no_grad():
            for images, _ in test_loader:
                images = images.to(device)
                outputs = model(images)
                probs = torch.softmax(outputs, dim=1)
                
                _, preds = torch.max(outputs, 1)
                predictions.extend(preds.cpu().numpy())
                probabilities.append(probs.cpu())
        
        all_predictions.append(predictions)
        all_probs.append(torch.cat(probabilities, dim=0).numpy())
    
    # Get true labels
    true_labels = []
    for _, labels in test_loader:
        true_labels.extend(labels.numpy())
    
    # Create ensemble predictions
    if method == 'hard':
        # Majority voting
        ensemble_preds = []
        for i in range(len(true_labels)):
            votes = [all_predictions[j][i] for j in range(len(models))]
            # Count occurrences of each class
            vote_counts = np.bincount(votes, minlength=num_classes)
            # Select class with most votes
            ensemble_preds.append(np.argmax(vote_counts))
    else:  # 'soft' voting
        # Average probabilities
        ensemble_probs = np.mean(all_probs, axis=0)
        ensemble_preds = np.argmax(ensemble_probs, axis=1)
    
    # Calculate accuracy
    accuracy = np.mean(np.array(ensemble_preds) == np.array(true_labels)) * 100
    
    return accuracy, ensemble_preds, true_labels

In [34]:
# EXPERIMENT CONFIGURATIONS
# 1. Architecture Comparison (removed ResNet)
architectures = {
    'efficientnet': create_efficientnet_model(num_classes),
    'custom_cnn': CustomCNN(num_classes)
}

# 2. Hyperparameter Configurations for Training Process
training_hyperparams = [
    {'optimizer': 'adam', 'learning_rate': 0.001, 'scheduler': 'plateau', 'weight_decay': 0.0001},
    {'optimizer': 'sgd', 'learning_rate': 0.01, 'scheduler': 'plateau', 'weight_decay': 0.0001},
    {'optimizer': 'adam', 'learning_rate': 0.001, 'scheduler': 'cosine', 'weight_decay': 0.0001}
]

# 3. Hyperparameter Configurations for Regularization
regularization_hyperparams = [
    {'optimizer': 'adam', 'learning_rate': 0.001, 'scheduler': 'plateau', 'weight_decay': 0.0001},
    {'optimizer': 'adam', 'learning_rate': 0.001, 'scheduler': 'plateau', 'weight_decay': 0.001},
    {'optimizer': 'adam', 'learning_rate': 0.001, 'scheduler': 'plateau', 'weight_decay': 0.0}
]

# 4. Data Augmentation Experiments
# We'll use a modified version of the train_dataset for each transform
augmentation_datasets = {}

# Apply each transform to create new datasets
for name, transform in standard_transforms.items():
    augmentation_datasets[name] = ImageFolder(
        root='data/train', 
        transform=transform
    )
    # Create a subset of the data
    augmentation_datasets[name] = create_subset_dataset(augmentation_datasets[name], num_samples=TRAIN_SUBSET_SIZE)

# Add advanced augmentation
for name, transform in advanced_transforms.items():
    augmentation_datasets[name] = ImageFolder(
        root='data/train', 
        transform=transform
    )
    # Create a subset of the data
    augmentation_datasets[name] = create_subset_dataset(augmentation_datasets[name], num_samples=TRAIN_SUBSET_SIZE)

# Dictionary to store results of all experiments
all_results = {}

In [35]:
# ==== RUN EXPERIMENTS ====
# 1. Architecture Comparison
print("\n" + "="*30 + " ARCHITECTURE COMPARISON " + "="*30)
architecture_results = {}

for arch_name, model in architectures.items():
    print(f"\nTraining {arch_name}...")
    model = model.to(device)
    
    hyperparams = {
        'optimizer': 'adam',
        'learning_rate': 0.001,
        'scheduler': 'plateau',
        'weight_decay': 0.0001
    }
    
    results = run_experiment(
        f"Architecture_{arch_name}",
        model, 
        train_dataset, 
        valid_dataset, 
        test_dataset,
        hyperparams,
        batch_size=128,  # Larger batch size for faster training
        num_epochs=3  # Further reduced epochs for faster comparison
    )
    
    architecture_results[arch_name] = results
    all_results[f"Architecture_{arch_name}"] = results
    
    # Save model state
    torch.save(model.state_dict(), f"{arch_name}_model.pth")



Training efficientnet...

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.9279 | Train Acc: 68.56%
Valid Loss: 0.6850 | Valid Acc: 76.35%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.4403 | Train Acc: 84.61%
Valid Loss: 0.7002 | Valid Acc: 76.81%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.3044 | Train Acc: 89.42%
Valid Loss: 0.7406 | Valid Acc: 77.71%
Training completed in 3.67 minutes
Test Accuracy: 78.67%

Training custom_cnn...

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 2.1121 | Train Acc: 20.19%
Valid Loss: 2.1781 | Valid Acc: 19.24%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 1.9828 | Train Acc: 23.55%
Valid Loss: 1.9764 | Valid Acc: 24.98%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 1.9205 | Train Acc: 26.07%
Valid Loss: 1.8664 | Valid Acc: 28.95%
Training completed in 4.00 minutes
Test Accuracy: 28.48%


In [36]:
# 2. Training Hyperparameters Comparison
print("\n" + "="*30 + " TRAINING HYPERPARAMETERS COMPARISON " + "="*30)
training_results = {}

# Use the best architecture from the previous experiment
best_arch = max(architecture_results, key=lambda k: architecture_results[k]['test_accuracy'])
print(f"Using best architecture: {best_arch}")

for i, hyperparams in enumerate(training_hyperparams):
    # Create a new model with the best architecture
    model = architectures[best_arch]
    model = model.to(device)
    
    experiment_name = f"Training_Hyperparams_{i+1}"
    results = run_experiment(
        experiment_name,
        model, 
        train_dataset, 
        valid_dataset, 
        test_dataset,
        hyperparams,
        batch_size=128,
        num_epochs=3
    )
    
    training_results[experiment_name] = results
    all_results[experiment_name] = results


Using best architecture: efficientnet

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.2789 | Train Acc: 90.33%
Valid Loss: 0.7669 | Valid Acc: 77.76%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.1761 | Train Acc: 94.04%
Valid Loss: 0.8718 | Valid Acc: 77.96%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.1405 | Train Acc: 95.34%
Valid Loss: 1.0005 | Valid Acc: 76.54%
Training completed in 3.64 minutes
Test Accuracy: 77.96%

optimizer: sgd
learning_rate: 0.01
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.010000
Train Loss: 0.0840 | Train Acc: 97.38%
Valid Loss: 0.7036 | Valid Acc: 81.00%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.010000
Train Loss: 0.0436 | Train Acc: 98.76%
Valid Loss: 0.7085 | Valid Acc: 81.81%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.010000
Train Loss: 0.0280 | Train Acc: 99.21%
Valid Loss: 0.7164 | Valid Acc: 82.04%
Training completed in 3.64 minutes
Test Accuracy: 81.65%

optimizer: adam
learning_rate: 0.001
scheduler: cosine
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.000750
Train Loss: 0.1856 | Train Acc: 93.92%
Valid Loss: 0.8499 | Valid Acc: 77.79%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.000250
Train Loss: 0.0929 | Train Acc: 97.03%
Valid Loss: 0.8195 | Valid Acc: 80.06%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.000000
Train Loss: 0.0386 | Train Acc: 98.89%
Valid Loss: 0.7621 | Valid Acc: 81.89%
Training completed in 3.62 minutes
Test Accuracy: 81.79%


In [37]:
# 3. Regularization Hyperparameters Comparison
print("\n" + "="*30 + " REGULARIZATION HYPERPARAMETERS COMPARISON " + "="*30)
regularization_results = {}

for i, hyperparams in enumerate(regularization_hyperparams):
    # Create a new model with the best architecture
    model = architectures[best_arch]
    model = model.to(device)
    
    experiment_name = f"Regularization_Hyperparams_{i+1}"
    results = run_experiment(
        experiment_name,
        model, 
        train_dataset, 
        valid_dataset, 
        test_dataset,
        hyperparams,
        batch_size=128,
        num_epochs=3
    )
    
    regularization_results[experiment_name] = results
    all_results[experiment_name] = results



optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.2837 | Train Acc: 89.97%
Valid Loss: 0.7890 | Valid Acc: 77.85%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.1899 | Train Acc: 93.56%
Valid Loss: 0.8499 | Valid Acc: 76.26%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.1632 | Train Acc: 94.66%
Valid Loss: 0.9644 | Valid Acc: 76.46%
Training completed in 3.04 minutes
Test Accuracy: 77.79%

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

In [38]:
# This code contains the completion of the comprehensive experiments

# 4. Data Augmentation Experiments
print("\n" + "="*30 + " DATA AUGMENTATION COMPARISON " + "="*30)
augmentation_results = {}

# Create a new model with the best architecture
best_arch = 'efficientnet'  # Default to efficientnet since we removed resnet
model = architectures[best_arch]
model = model.to(device)

# Baseline hyperparameters
hyperparams = {
    'optimizer': 'adam',
    'learning_rate': 0.001,
    'scheduler': 'plateau',
    'weight_decay': 0.0001
}

# Standard augmentations
for aug_name in ['baseline', 'horizontal_flip', 'rotation', 'color_jitter']:
    print(f"\nTesting standard augmentation: {aug_name}")
    
    experiment_name = f"Augmentation_{aug_name}"
    results = run_experiment(
        experiment_name,
        model,
        augmentation_datasets[aug_name],
        valid_dataset,
        test_dataset,
        hyperparams,
        batch_size=128,
        num_epochs=3
    )
    
    augmentation_results[experiment_name] = results
    all_results[experiment_name] = results



Testing standard augmentation: baseline

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.6271 | Train Acc: 79.83%
Valid Loss: 0.5596 | Valid Acc: 80.09%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.2833 | Train Acc: 90.66%
Valid Loss: 0.6234 | Valid Acc: 79.76%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.1711 | Train Acc: 94.10%
Valid Loss: 0.7159 | Valid Acc: 79.97%
Training completed in 3.65 minutes
Test Accuracy: 79.97%

Testing standard augmentation: horizontal_flip

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.5946 | Train Acc: 79.95%
Valid Loss: 0.5288 | Valid Acc: 81.36%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.3255 | Train Acc: 88.34%
Valid Loss: 0.6226 | Valid Acc: 79.64%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.2498 | Train Acc: 91.33%
Valid Loss: 0.6889 | Valid Acc: 79.74%
Training completed in 3.61 minutes
Test Accuracy: 81.31%

Testing standard augmentation: rotation

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.6151 | Train Acc: 78.47%
Valid Loss: 0.5297 | Valid Acc: 81.31%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.3593 | Train Acc: 87.11%
Valid Loss: 0.5764 | Valid Acc: 80.91%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.2685 | Train Acc: 90.42%
Valid Loss: 0.6821 | Valid Acc: 79.61%
Training completed in 3.63 minutes
Test Accuracy: 81.64%

Testing standard augmentation: color_jitter

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.5929 | Train Acc: 79.61%
Valid Loss: 0.5431 | Valid Acc: 81.21%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.3470 | Train Acc: 87.94%
Valid Loss: 0.5692 | Valid Acc: 81.00%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.2310 | Train Acc: 92.05%
Valid Loss: 0.7042 | Valid Acc: 79.19%
Training completed in 3.64 minutes
Test Accuracy: 80.99%


In [44]:
# Advanced augmentation (cutout)
print("\nTesting advanced augmentation: cutout")
experiment_name = "Augmentation_cutout"
results = run_experiment(
    experiment_name,
    model,
    augmentation_datasets['cutout'],
    valid_dataset,
    test_dataset,
    hyperparams,
    batch_size=128,
    num_epochs=3
)
augmentation_results[experiment_name] = results
all_results[experiment_name] = results


Testing advanced augmentation: cutout

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.5497 | Train Acc: 80.76%
Valid Loss: 0.4944 | Valid Acc: 82.50%


Epoch 2:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.3231 | Train Acc: 88.62%
Valid Loss: 0.5369 | Valid Acc: 82.15%


Epoch 3:   0%|          | 0/63 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.2323 | Train Acc: 91.91%
Valid Loss: 0.5816 | Valid Acc: 81.72%
Training completed in 2.89 minutes
Test Accuracy: 82.30%


In [None]:
# 5. Few-shot Learning Experiment
print("\n" + "="*30 + " FEW-SHOT LEARNING " + "="*30)

# Create a feature extractor for few-shot learning
feature_extractor = FeatureExtractor().to(device)

print("Evaluating few-shot learning capabilities...")
few_shot_acc, few_shot_std = few_shot_evaluation(
    feature_extractor, 
    test_dataset, 
    device, 
    n_way=5,    # 5-way classification 
    n_shot=5,   # 5-shot learning
    n_query=15, # 15 query samples per class
    n_episodes=20  # 20 episodes for quick evaluation
)

few_shot_results = {
    'name': 'Few-shot Learning (5-way, 5-shot)',
    'accuracy': few_shot_acc,
    'std': few_shot_std
}
all_results['few_shot'] = few_shot_results

In [45]:
# 6. Reduced Training Set Size Experiment
print("\n" + "="*30 + " REDUCED TRAINING SET EXPERIMENT " + "="*30)

# Create even smaller training subsets
smaller_train_sizes = [TRAIN_SUBSET_SIZE // 2, TRAIN_SUBSET_SIZE // 4]
reduction_results = {}

for size in smaller_train_sizes:
    print(f"\nTesting with reduced training set size: {size} samples")
    
    # Create reduced training dataset
    reduced_train_dataset = create_subset_dataset(full_train_dataset, num_samples=size)
    
    # Create a new model with the best architecture
    model = architectures[best_arch]
    model = model.to(device)
    
    experiment_name = f"Reduced_Train_Size_{size}"
    results = run_experiment(
        experiment_name,
        model,
        reduced_train_dataset,
        valid_dataset,
        test_dataset,
        hyperparams,
        batch_size=128,
        num_epochs=3
    )
    
    reduction_results[experiment_name] = results
    all_results[experiment_name] = results



Testing with reduced training set size: 4000 samples

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/32 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.5170 | Train Acc: 82.25%
Valid Loss: 0.5129 | Valid Acc: 81.71%


Epoch 2:   0%|          | 0/32 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.2074 | Train Acc: 93.03%
Valid Loss: 0.5711 | Valid Acc: 81.25%


Epoch 3:   0%|          | 0/32 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.1077 | Train Acc: 96.62%
Valid Loss: 0.6575 | Valid Acc: 81.09%
Training completed in 1.81 minutes
Test Accuracy: 81.36%

Testing with reduced training set size: 2000 samples

optimizer: adam
learning_rate: 0.001
scheduler: plateau
weight_decay: 0.0001


Epoch 1:   0%|          | 0/16 [00:00<?, ?it/s]

Epoch 1/3 | LR: 0.001000
Train Loss: 0.5268 | Train Acc: 81.25%
Valid Loss: 0.5779 | Valid Acc: 80.15%


Epoch 2:   0%|          | 0/16 [00:00<?, ?it/s]

Epoch 2/3 | LR: 0.001000
Train Loss: 0.2129 | Train Acc: 93.55%
Valid Loss: 0.5439 | Valid Acc: 81.56%


Epoch 3:   0%|          | 0/16 [00:00<?, ?it/s]

Epoch 3/3 | LR: 0.001000
Train Loss: 0.0760 | Train Acc: 97.90%
Valid Loss: 0.6482 | Valid Acc: 80.59%
Training completed in 1.29 minutes
Test Accuracy: 81.90%


In [46]:
# 7. Ensemble Methods
print("\n" + "="*30 + " ENSEMBLE METHODS " + "="*30)

# We'll use the models we've already trained
# Collect all saved models from previous experiments
model_states = [
    all_results[k]['model_state'] 
    for k in all_results 
    if 'model_state' in all_results[k]
]

# Create test data loader for ensemble evaluation
test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False, num_workers=0)

# Create ensemble models
ensemble_models = []
for i, state_dict in enumerate(model_states[:3]):  # Use top 3 models
    # Create a new model with the best architecture
    model = architectures[best_arch]
    model.load_state_dict(state_dict)
    model = model.to(device)
    ensemble_models.append(model)

print("Testing ensemble methods...")
# Hard voting
hard_accuracy, hard_preds, hard_labels = ensemble_prediction(
    ensemble_models,
    test_loader,
    device,
    method='hard'
)
print(f"Hard voting ensemble accuracy: {hard_accuracy:.2f}%")

# Soft voting
soft_accuracy, soft_preds, soft_labels = ensemble_prediction(
    ensemble_models,
    test_loader,
    device,
    method='soft'
)
print(f"Soft voting ensemble accuracy: {soft_accuracy:.2f}%")

ensemble_results = {
    'hard_voting': {
        'accuracy': hard_accuracy,
        'predictions': hard_preds,
        'labels': hard_labels
    },
    'soft_voting': {
        'accuracy': soft_accuracy,
        'predictions': soft_preds,
        'labels': soft_labels
    }
}
all_results['ensemble'] = ensemble_results




RuntimeError: Error(s) in loading state_dict for EfficientNet:
	Missing key(s) in state_dict: "features.0.0.weight", "features.0.1.weight", "features.0.1.bias", "features.0.1.running_mean", "features.0.1.running_var", "features.1.0.block.0.0.weight", "features.1.0.block.0.1.weight", "features.1.0.block.0.1.bias", "features.1.0.block.0.1.running_mean", "features.1.0.block.0.1.running_var", "features.1.0.block.1.fc1.weight", "features.1.0.block.1.fc1.bias", "features.1.0.block.1.fc2.weight", "features.1.0.block.1.fc2.bias", "features.1.0.block.2.0.weight", "features.1.0.block.2.1.weight", "features.1.0.block.2.1.bias", "features.1.0.block.2.1.running_mean", "features.1.0.block.2.1.running_var", "features.2.0.block.0.0.weight", "features.2.0.block.0.1.weight", "features.2.0.block.0.1.bias", "features.2.0.block.0.1.running_mean", "features.2.0.block.0.1.running_var", "features.2.0.block.1.0.weight", "features.2.0.block.1.1.weight", "features.2.0.block.1.1.bias", "features.2.0.block.1.1.running_mean", "features.2.0.block.1.1.running_var", "features.2.0.block.2.fc1.weight", "features.2.0.block.2.fc1.bias", "features.2.0.block.2.fc2.weight", "features.2.0.block.2.fc2.bias", "features.2.0.block.3.0.weight", "features.2.0.block.3.1.weight", "features.2.0.block.3.1.bias", "features.2.0.block.3.1.running_mean", "features.2.0.block.3.1.running_var", "features.2.1.block.0.0.weight", "features.2.1.block.0.1.weight", "features.2.1.block.0.1.bias", "features.2.1.block.0.1.running_mean", "features.2.1.block.0.1.running_var", "features.2.1.block.1.0.weight", "features.2.1.block.1.1.weight", "features.2.1.block.1.1.bias", "features.2.1.block.1.1.running_mean", "features.2.1.block.1.1.running_var", "features.2.1.block.2.fc1.weight", "features.2.1.block.2.fc1.bias", "features.2.1.block.2.fc2.weight", "features.2.1.block.2.fc2.bias", "features.2.1.block.3.0.weight", "features.2.1.block.3.1.weight", "features.2.1.block.3.1.bias", "features.2.1.block.3.1.running_mean", "features.2.1.block.3.1.running_var", "features.3.0.block.0.0.weight", "features.3.0.block.0.1.weight", "features.3.0.block.0.1.bias", "features.3.0.block.0.1.running_mean", "features.3.0.block.0.1.running_var", "features.3.0.block.1.0.weight", "features.3.0.block.1.1.weight", "features.3.0.block.1.1.bias", "features.3.0.block.1.1.running_mean", "features.3.0.block.1.1.running_var", "features.3.0.block.2.fc1.weight", "features.3.0.block.2.fc1.bias", "features.3.0.block.2.fc2.weight", "features.3.0.block.2.fc2.bias", "features.3.0.block.3.0.weight", "features.3.0.block.3.1.weight", "features.3.0.block.3.1.bias", "features.3.0.block.3.1.running_mean", "features.3.0.block.3.1.running_var", "features.3.1.block.0.0.weight", "features.3.1.block.0.1.weight", "features.3.1.block.0.1.bias", "features.3.1.block.0.1.running_mean", "features.3.1.block.0.1.running_var", "features.3.1.block.1.0.weight", "features.3.1.block.1.1.weight", "features.3.1.block.1.1.bias", "features.3.1.block.1.1.running_mean", "features.3.1.block.1.1.running_var", "features.3.1.block.2.fc1.weight", "features.3.1.block.2.fc1.bias", "features.3.1.block.2.fc2.weight", "features.3.1.block.2.fc2.bias", "features.3.1.block.3.0.weight", "features.3.1.block.3.1.weight", "features.3.1.block.3.1.bias", "features.3.1.block.3.1.running_mean", "features.3.1.block.3.1.running_var", "features.4.0.block.0.0.weight", "features.4.0.block.0.1.weight", "features.4.0.block.0.1.bias", "features.4.0.block.0.1.running_mean", "features.4.0.block.0.1.running_var", "features.4.0.block.1.0.weight", "features.4.0.block.1.1.weight", "features.4.0.block.1.1.bias", "features.4.0.block.1.1.running_mean", "features.4.0.block.1.1.running_var", "features.4.0.block.2.fc1.weight", "features.4.0.block.2.fc1.bias", "features.4.0.block.2.fc2.weight", "features.4.0.block.2.fc2.bias", "features.4.0.block.3.0.weight", "features.4.0.block.3.1.weight", "features.4.0.block.3.1.bias", "features.4.0.block.3.1.running_mean", "features.4.0.block.3.1.running_var", "features.4.1.block.0.0.weight", "features.4.1.block.0.1.weight", "features.4.1.block.0.1.bias", "features.4.1.block.0.1.running_mean", "features.4.1.block.0.1.running_var", "features.4.1.block.1.0.weight", "features.4.1.block.1.1.weight", "features.4.1.block.1.1.bias", "features.4.1.block.1.1.running_mean", "features.4.1.block.1.1.running_var", "features.4.1.block.2.fc1.weight", "features.4.1.block.2.fc1.bias", "features.4.1.block.2.fc2.weight", "features.4.1.block.2.fc2.bias", "features.4.1.block.3.0.weight", "features.4.1.block.3.1.weight", "features.4.1.block.3.1.bias", "features.4.1.block.3.1.running_mean", "features.4.1.block.3.1.running_var", "features.4.2.block.0.0.weight", "features.4.2.block.0.1.weight", "features.4.2.block.0.1.bias", "features.4.2.block.0.1.running_mean", "features.4.2.block.0.1.running_var", "features.4.2.block.1.0.weight", "features.4.2.block.1.1.weight", "features.4.2.block.1.1.bias", "features.4.2.block.1.1.running_mean", "features.4.2.block.1.1.running_var", "features.4.2.block.2.fc1.weight", "features.4.2.block.2.fc1.bias", "features.4.2.block.2.fc2.weight", "features.4.2.block.2.fc2.bias", "features.4.2.block.3.0.weight", "features.4.2.block.3.1.weight", "features.4.2.block.3.1.bias", "features.4.2.block.3.1.running_mean", "features.4.2.block.3.1.running_var", "features.5.0.block.0.0.weight", "features.5.0.block.0.1.weight", "features.5.0.block.0.1.bias", "features.5.0.block.0.1.running_mean", "features.5.0.block.0.1.running_var", "features.5.0.block.1.0.weight", "features.5.0.block.1.1.weight", "features.5.0.block.1.1.bias", "features.5.0.block.1.1.running_mean", "features.5.0.block.1.1.running_var", "features.5.0.block.2.fc1.weight", "features.5.0.block.2.fc1.bias", "features.5.0.block.2.fc2.weight", "features.5.0.block.2.fc2.bias", "features.5.0.block.3.0.weight", "features.5.0.block.3.1.weight", "features.5.0.block.3.1.bias", "features.5.0.block.3.1.running_mean", "features.5.0.block.3.1.running_var", "features.5.1.block.0.0.weight", "features.5.1.block.0.1.weight", "features.5.1.block.0.1.bias", "features.5.1.block.0.1.running_mean", "features.5.1.block.0.1.running_var", "features.5.1.block.1.0.weight", "features.5.1.block.1.1.weight", "features.5.1.block.1.1.bias", "features.5.1.block.1.1.running_mean", "features.5.1.block.1.1.running_var", "features.5.1.block.2.fc1.weight", "features.5.1.block.2.fc1.bias", "features.5.1.block.2.fc2.weight", "features.5.1.block.2.fc2.bias", "features.5.1.block.3.0.weight", "features.5.1.block.3.1.weight", "features.5.1.block.3.1.bias", "features.5.1.block.3.1.running_mean", "features.5.1.block.3.1.running_var", "features.5.2.block.0.0.weight", "features.5.2.block.0.1.weight", "features.5.2.block.0.1.bias", "features.5.2.block.0.1.running_mean", "features.5.2.block.0.1.running_var", "features.5.2.block.1.0.weight", "features.5.2.block.1.1.weight", "features.5.2.block.1.1.bias", "features.5.2.block.1.1.running_mean", "features.5.2.block.1.1.running_var", "features.5.2.block.2.fc1.weight", "features.5.2.block.2.fc1.bias", "features.5.2.block.2.fc2.weight", "features.5.2.block.2.fc2.bias", "features.5.2.block.3.0.weight", "features.5.2.block.3.1.weight", "features.5.2.block.3.1.bias", "features.5.2.block.3.1.running_mean", "features.5.2.block.3.1.running_var", "features.6.0.block.0.0.weight", "features.6.0.block.0.1.weight", "features.6.0.block.0.1.bias", "features.6.0.block.0.1.running_mean", "features.6.0.block.0.1.running_var", "features.6.0.block.1.0.weight", "features.6.0.block.1.1.weight", "features.6.0.block.1.1.bias", "features.6.0.block.1.1.running_mean", "features.6.0.block.1.1.running_var", "features.6.0.block.2.fc1.weight", "features.6.0.block.2.fc1.bias", "features.6.0.block.2.fc2.weight", "features.6.0.block.2.fc2.bias", "features.6.0.block.3.0.weight", "features.6.0.block.3.1.weight", "features.6.0.block.3.1.bias", "features.6.0.block.3.1.running_mean", "features.6.0.block.3.1.running_var", "features.6.1.block.0.0.weight", "features.6.1.block.0.1.weight", "features.6.1.block.0.1.bias", "features.6.1.block.0.1.running_mean", "features.6.1.block.0.1.running_var", "features.6.1.block.1.0.weight", "features.6.1.block.1.1.weight", "features.6.1.block.1.1.bias", "features.6.1.block.1.1.running_mean", "features.6.1.block.1.1.running_var", "features.6.1.block.2.fc1.weight", "features.6.1.block.2.fc1.bias", "features.6.1.block.2.fc2.weight", "features.6.1.block.2.fc2.bias", "features.6.1.block.3.0.weight", "features.6.1.block.3.1.weight", "features.6.1.block.3.1.bias", "features.6.1.block.3.1.running_mean", "features.6.1.block.3.1.running_var", "features.6.2.block.0.0.weight", "features.6.2.block.0.1.weight", "features.6.2.block.0.1.bias", "features.6.2.block.0.1.running_mean", "features.6.2.block.0.1.running_var", "features.6.2.block.1.0.weight", "features.6.2.block.1.1.weight", "features.6.2.block.1.1.bias", "features.6.2.block.1.1.running_mean", "features.6.2.block.1.1.running_var", "features.6.2.block.2.fc1.weight", "features.6.2.block.2.fc1.bias", "features.6.2.block.2.fc2.weight", "features.6.2.block.2.fc2.bias", "features.6.2.block.3.0.weight", "features.6.2.block.3.1.weight", "features.6.2.block.3.1.bias", "features.6.2.block.3.1.running_mean", "features.6.2.block.3.1.running_var", "features.6.3.block.0.0.weight", "features.6.3.block.0.1.weight", "features.6.3.block.0.1.bias", "features.6.3.block.0.1.running_mean", "features.6.3.block.0.1.running_var", "features.6.3.block.1.0.weight", "features.6.3.block.1.1.weight", "features.6.3.block.1.1.bias", "features.6.3.block.1.1.running_mean", "features.6.3.block.1.1.running_var", "features.6.3.block.2.fc1.weight", "features.6.3.block.2.fc1.bias", "features.6.3.block.2.fc2.weight", "features.6.3.block.2.fc2.bias", "features.6.3.block.3.0.weight", "features.6.3.block.3.1.weight", "features.6.3.block.3.1.bias", "features.6.3.block.3.1.running_mean", "features.6.3.block.3.1.running_var", "features.7.0.block.0.0.weight", "features.7.0.block.0.1.weight", "features.7.0.block.0.1.bias", "features.7.0.block.0.1.running_mean", "features.7.0.block.0.1.running_var", "features.7.0.block.1.0.weight", "features.7.0.block.1.1.weight", "features.7.0.block.1.1.bias", "features.7.0.block.1.1.running_mean", "features.7.0.block.1.1.running_var", "features.7.0.block.2.fc1.weight", "features.7.0.block.2.fc1.bias", "features.7.0.block.2.fc2.weight", "features.7.0.block.2.fc2.bias", "features.7.0.block.3.0.weight", "features.7.0.block.3.1.weight", "features.7.0.block.3.1.bias", "features.7.0.block.3.1.running_mean", "features.7.0.block.3.1.running_var", "features.8.0.weight", "features.8.1.weight", "features.8.1.bias", "features.8.1.running_mean", "features.8.1.running_var". 
	Unexpected key(s) in state_dict: "features.10.weight", "features.10.bias", "features.11.weight", "features.11.bias", "features.11.running_mean", "features.11.running_var", "features.11.num_batches_tracked", "features.14.weight", "features.14.bias", "features.15.weight", "features.15.bias", "features.15.running_mean", "features.15.running_var", "features.15.num_batches_tracked", "features.17.weight", "features.17.bias", "features.18.weight", "features.18.bias", "features.18.running_mean", "features.18.running_var", "features.18.num_batches_tracked", "features.0.weight", "features.0.bias", "features.1.weight", "features.1.bias", "features.1.running_mean", "features.1.running_var", "features.1.num_batches_tracked", "features.3.weight", "features.3.bias", "features.4.weight", "features.4.bias", "features.4.running_mean", "features.4.running_var", "features.4.num_batches_tracked", "features.7.weight", "features.7.bias", "features.8.weight", "features.8.bias", "features.8.running_mean", "features.8.running_var", "features.8.num_batches_tracked", "classifier.4.weight", "classifier.4.bias". 
	size mismatch for classifier.1.weight: copying a param with shape torch.Size([256, 128]) from checkpoint, the shape in current model is torch.Size([10, 1280]).
	size mismatch for classifier.1.bias: copying a param with shape torch.Size([256]) from checkpoint, the shape in current model is torch.Size([10]).

In [None]:
# 8. Summarize and Visualize Results
print("\n" + "="*30 + " RESULTS SUMMARY " + "="*30)

# 1. Architecture Comparison
print("\nArchitecture Comparison:")
for arch_name, results in architecture_results.items():
    print(f"{arch_name}: {results['test_accuracy']:.2f}%")

# 2. Training Hyperparameters
print("\nTraining Hyperparameters Comparison:")
for name, results in training_results.items():
    print(f"{name} - {results['hyperparams']}: {results['test_accuracy']:.2f}%")

# 3. Regularization Hyperparameters
print("\nRegularization Hyperparameters Comparison:")
for name, results in regularization_results.items():
    print(f"{name} - Weight Decay: {results['hyperparams']['weight_decay']}: {results['test_accuracy']:.2f}%")

# 4. Data Augmentation
print("\nData Augmentation Comparison:")
for name, results in augmentation_results.items():
    print(f"{name}: {results['test_accuracy']:.2f}%")

# 5. Few-shot Learning
print(f"\nFew-shot Learning (5-way, 5-shot): {few_shot_results['accuracy']:.2f}% ± {few_shot_results['std']:.2f}%")

# 6. Reduced Training Set Size
print("\nReduced Training Set Size Comparison:")
for name, results in reduction_results.items():
    size = name.split('_')[-1]
    print(f"Size {size}: {results['test_accuracy']:.2f}%")

# 7. Ensemble Methods
print(f"\nEnsemble Hard Voting: {ensemble_results['hard_voting']['accuracy']:.2f}%")
print(f"Ensemble Soft Voting: {ensemble_results['soft_voting']['accuracy']:.2f}%")

In [None]:
# Create visualizations

# 1. Plot test accuracies
plt.figure(figsize=(15, 10))

# Architecture comparison
plt.subplot(2, 2, 1)
arch_names = list(architecture_results.keys())
arch_accs = [architecture_results[n]['test_accuracy'] for n in arch_names]
plt.bar(arch_names, arch_accs)
plt.title('Architecture Comparison')
plt.ylabel('Test Accuracy (%)')
plt.ylim(0, 100)

# Augmentation comparison
plt.subplot(2, 2, 2)
aug_names = [name.split('_')[1] for name in augmentation_results.keys()]
aug_accs = [augmentation_results[name]['test_accuracy'] for name in augmentation_results.keys()]
plt.bar(aug_names, aug_accs)
plt.title('Data Augmentation Comparison')
plt.ylabel('Test Accuracy (%)')
plt.ylim(0, 100)

# Reduced training set size
plt.subplot(2, 2, 3)
sizes = [name.split('_')[-1] for name in reduction_results.keys()]
size_accs = [reduction_results[name]['test_accuracy'] for name in reduction_results.keys()]
# Add the full dataset result
sizes.append(str(TRAIN_SUBSET_SIZE))
size_accs.append(all_results['Architecture_efficientnet']['test_accuracy'])
plt.bar(sizes, size_accs)
plt.title('Reduced Training Set Size')
plt.xlabel('Number of Training Samples')
plt.ylabel('Test Accuracy (%)')
plt.ylim(0, 100)

# Ensemble comparison
plt.subplot(2, 2, 4)
methods = ['Hard Voting', 'Soft Voting', 'Best Single Model']
accuracies = [
    ensemble_results['hard_voting']['accuracy'],
    ensemble_results['soft_voting']['accuracy'],
    max([results['test_accuracy'] for name, results in all_results.items() if 'test_accuracy' in results])
]
plt.bar(methods, accuracies)
plt.title('Ensemble Methods Comparison')
plt.ylabel('Test Accuracy (%)')
plt.ylim(0, 100)

plt.tight_layout()
plt.savefig('experiment_results_summary.png')
plt.show()

In [None]:

# Save all results to file
import json
import pickle

# Convert numpy arrays to lists for JSON serialization
def convert_numpy_to_list(obj):
    if isinstance(obj, np.ndarray):
        return obj.tolist()
    elif isinstance(obj, dict):
        return {k: convert_numpy_to_list(v) for k, v in obj.items()}
    elif isinstance(obj, list):
        return [convert_numpy_to_list(item) for item in obj]
    else:
        return obj

# Convert all results
serializable_results = convert_numpy_to_list(all_results)

# Remove model states which can't be serialized easily
for key in serializable_results:
    if isinstance(serializable_results[key], dict) and 'model_state' in serializable_results[key]:
        del serializable_results[key]['model_state']

# Save as JSON for easy viewing
with open('experiment_results.json', 'w') as f:
    json.dump(serializable_results, f, indent=2)

print("\nExperiment results saved to 'experiment_results.json'")
print("Summary visualizations saved to 'experiment_results_summary.png'")

# Save full results including model states with pickle
try:
    with open('full_experiment_results.pkl', 'wb') as f:
        pickle.dump(all_results, f)
    print("Full results including model states saved to 'full_experiment_results.pkl'")
except:
    print("Warning: Could not save full results with model states")

print("\n" + "="*30 + " EXPERIMENTS COMPLETED " + "="*30)