In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader, random_split, Subset
from torchvision import transforms
from PIL import Image
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import time
import os

# ==========================================
# 1. Settings & Hyperparameters
# ==========================================
# Try to detect GPU (CUDA or MPS)
if torch.cuda.is_available():
    DEVICE = 'cuda'
    print("CUDA device detected. Using GPU.")
elif torch.backends.mps.is_available():
    DEVICE = 'mps'
    print("Apple MPS device detected. Using Apple GPU.")
else:
    DEVICE = 'cpu'
    print("No GPU detected. Using CPU.")

CONFIG = {
    'img_dir': './cifar-10/train/train',
    'labels_csv': './cifar-10/trainLabels.csv',
    'test_dir': './cifar-10/test/test',
    'batch_size': 32,
    'learning_rate': 0.001,
    'epochs': 15,
    'val_split': 0.1,
    'data_subset_ratio': 0.2,  # Only use 20% of training data
    'device': DEVICE
}

# Mapping class names to indices
label_map = {
    'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4,
    'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9
}

# ==========================================
# 2. Custom Dataset Class
# ==========================================
class Cifar10Dataset(Dataset):
    def __init__(self, csv_file, img_dir, transform=None, is_test=False):
        self.img_dir = img_dir
        self.transform = transform
        self.is_test = is_test
        
        if not is_test:
            self.data = pd.read_csv(csv_file)
        else:
            self.image_files = sorted(
                [f for f in os.listdir(img_dir) if f.endswith('.png')],
                key=lambda x: int(os.path.splitext(x)[0])
            )

    def __len__(self):
        return len(self.image_files) if self.is_test else len(self.data)

    def __getitem__(self, idx):
        if self.is_test:
            img_path = os.path.join(self.img_dir, self.image_files[idx])
            label = -1
        else:
            img_id = str(self.data.iloc[idx, 0])
            img_path = os.path.join(self.img_dir, img_id + '.png')
            label = label_map[self.data.iloc[idx, 1]]

        try:
            image = Image.open(img_path).convert('RGB')
        except:
            print(f"Error loading image {img_path}")
            image = Image.new('RGB', (32, 32))
            
        if self.transform:
            image = self.transform(image)

        return image, label

# Image transforms
transform = transforms.Compose([
    transforms.Resize((32, 32)),
    transforms.ToTensor(),
    transforms.Normalize((0.4914, 0.4822, 0.4465),
                         (0.2023, 0.1994, 0.2010))
])

# ==========================================
# 3. CNN Model Definition
# ==========================================
class BaseCNN(nn.Module):
    def __init__(self):
        super(BaseCNN, self).__init__()
        self.conv1 = nn.Conv2d(3, 64, kernel_size=4)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.conv2 = nn.Conv2d(64, 32, kernel_size=4)
        self.flatten = nn.Flatten()
        self.fc1 = nn.Linear(32 * 5 * 5, 256)
        self.fc2 = nn.Linear(256, 10)
        self.relu = nn.ReLU()

    def forward(self, x):
        x = self.pool(self.relu(self.conv1(x)))
        x = self.pool(self.relu(self.conv2(x)))
        x = self.flatten(x)
        x = self.relu(self.fc1(x))
        return self.fc2(x)

# ==========================================
# 4. Training and Evaluation Logic
# ==========================================
def train_and_evaluate(run_id, model_class):
    print(f"\n--- Starting Run {run_id} ---")
    
    full_dataset = Cifar10Dataset(CONFIG['labels_csv'], CONFIG['img_dir'], transform=transform)
    
    # Use a subset for faster training
    if CONFIG['data_subset_ratio'] < 1.0:
        subset_size = int(len(full_dataset) * CONFIG['data_subset_ratio'])
        indices = torch.randperm(len(full_dataset))[:subset_size]
        subset_dataset = Subset(full_dataset, indices)
        print(f"Using {len(subset_dataset)} images (Subset Ratio: {CONFIG['data_subset_ratio']*100}%)")
    else:
        subset_dataset = full_dataset
    
    # Split subset into train/validation
    val_size = int(len(subset_dataset) * CONFIG['val_split'])
    train_size = len(subset_dataset) - val_size
    train_data, val_data = random_split(subset_dataset, [train_size, val_size])
    
    train_loader = DataLoader(train_data, batch_size=CONFIG['batch_size'], shuffle=True,
                              num_workers=4 if CONFIG['device'] == 'cuda' else 0)
    val_loader = DataLoader(val_data, batch_size=CONFIG['batch_size'], shuffle=False,
                            num_workers=4 if CONFIG['device'] == 'cuda' else 0)
    
    model = model_class().to(CONFIG['device'])
    criterion = nn.CrossEntropyLoss().to(CONFIG['device'])
    optimizer = optim.Adam(model.parameters(), lr=CONFIG['learning_rate'])
    
    history = {'train_loss': [], 'val_loss': [], 'val_acc': []}
    start_time = time.time()
    
    print("Training started...")
    for epoch in range(CONFIG['epochs']):
        model.train()
        train_loss = 0
        
        for images, labels in train_loader:
            images = images.to(CONFIG['device'])
            labels = labels.to(CONFIG['device'])
            
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            train_loss += loss.item()
            
        model.eval()
        val_loss, correct, total = 0, 0, 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(CONFIG['device'])
                labels = labels.to(CONFIG['device'])
                
                outputs = model(images)
                val_loss += criterion(outputs, labels).item()
                
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        
        avg_train_loss = train_loss / len(train_loader)
        avg_val_loss = val_loss / len(val_loader)
        val_acc = 100 * correct / total
        
        history['train_loss'].append(avg_train_loss)
        history['val_loss'].append(avg_val_loss)
        history['val_acc'].append(val_acc)
        
        print(f"Epoch {epoch+1}/{CONFIG['epochs']} | Train Loss: {avg_train_loss:.4f} | "
              f"Val Loss: {avg_val_loss:.4f} | Val Acc: {val_acc:.2f}%")
    
    return history, time.time() - start_time, model

# Final evaluation on validation set
def final_test_evaluation(model, subset_ratio):
    full_dataset = Cifar10Dataset(CONFIG['labels_csv'], CONFIG['img_dir'], transform=transform)
    
    if subset_ratio < 1.0:
        subset_size = int(len(full_dataset) * subset_ratio)
        indices = torch.randperm(len(full_dataset))[:subset_size]
        subset_dataset = Subset(full_dataset, indices)
    else:
        subset_dataset = full_dataset

    val_size = int(len(subset_dataset) * CONFIG['val_split'])
    train_size = len(subset_dataset) - val_size
    _, val_data = random_split(subset_dataset, [train_size, val_size])
    val_loader = DataLoader(val_data, batch_size=CONFIG['batch_size'], shuffle=False)
    
    model.eval()
    correct, total, final_loss = 0, 0, 0
    criterion = nn.CrossEntropyLoss().to(CONFIG['device'])
    
    with torch.no_grad():
        for images, labels in val_loader:
            images = images.to(CONFIG['device'])
            labels = labels.to(CONFIG['device'])
            
            outputs = model(images)
            final_loss += criterion(outputs, labels).item()
            
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    
    final_acc = 100 * correct / total
    avg_loss = final_loss / len(val_loader)
    
    print("\n--- Final Evaluation ---")
    print(f"Final Accuracy (Validation Subset): {final_acc:.2f}%")
    print(f"Final Loss (Validation Subset): {avg_loss:.4f}")
    
    return final_acc, avg_loss


# ==========================================
# 5. Main Execution
# ==========================================
histories = []
durations = []

for i in range(1, 2 + 1):
    hist, dur, trained_model = train_and_evaluate(i, BaseCNN)
    histories.append(hist)
    durations.append(dur)

avg_time = np.mean(durations)
final_acc, final_loss = final_test_evaluation(trained_model, CONFIG['data_subset_ratio'])

# Plotting
plt.figure(figsize=(12, 5))
plt.plot(histories[0]['train_loss'], label='Train Loss')
plt.plot(histories[0]['val_loss'], label='Validation Loss')
plt.title('Loss Curve: Base CNN Architecture (Reduced Dataset)')
plt.xlabel('Epoch')
plt.ylabel('Loss')
plt.legend()
plt.grid(True)
plt.show()

print("\n=============================================")
print("Final Report (Section 2 - Part A) - Reduced Dataset")
print("=============================================")
print(f"Average Runtime (2 runs): {avg_time:.2f} seconds")
print(f"Final Accuracy: {final_acc:.2f}%")
print(f"Final Loss: {final_loss:.4f}")
print("=============================================")


CUDA device detected. Using GPU.

--- Starting Run 1 ---
Using 10000 images (Subset Ratio: 20.0%)
Training started...
