# Evaluation Fully Supervised

In [None]:
# src/evaluation.py

import os
import random
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader, Subset
from torchvision.models import resnet18
import matplotlib.pyplot as plt
import sys
from sklearn.cluster import KMeans
from sklearn.metrics import pairwise_distances

# Import self-supervised functions and visualization utilities.
from typiclust_alg import SimCLRResNet18, compute_embeddings, typical_clustering_selection, DEVICE
from visualisation import plot_tsne, set_seed

# Set random seed for reproducibility.
set_seed(42)

def get_cifar10_datasets():
    """
    Loads CIFAR-10 training and test datasets.
    For training, uses random crop and horizontal flip (as per Appendix F.2.1).
    """
    train_transform = transforms.Compose([
        transforms.RandomCrop(32, padding=4),
        transforms.RandomHorizontalFlip(),
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.247, 0.243, 0.261))
    ])
    test_transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.4914, 0.4822, 0.4465),
                             (0.247, 0.243, 0.261))
    ])
    train_dataset = torchvision.datasets.CIFAR10(
        root="./data", train=True, download=True, transform=train_transform)
    test_dataset = torchvision.datasets.CIFAR10(
        root="./data", train=False, download=True, transform=test_transform)
    return train_dataset, test_dataset

def build_cnn_model():
    """
    Builds a ResNet-18 model for fully supervised training.
    Re-initializes from scratch for each active learning iteration.
    """
    model = resnet18(pretrained=False)
    num_features = model.fc.in_features
    model.fc = nn.Linear(num_features, 10)
    return model.to(DEVICE)

def train_model_supervised(model, dataloader, epochs, device=DEVICE):
    """
    Trains the model using SGD with momentum and a cosine annealing scheduler.
    Hyperparameters follow Appendix F.2.1: initial lr=0.025, momentum=0.9 with Nesterov momentum.
    """
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.SGD(model.parameters(), lr=0.025, momentum=0.9, nesterov=True)
    scheduler = optim.lr_scheduler.CosineAnnealingLR(optimizer, T_max=epochs)
    model.train()
    for epoch in range(epochs):
        total_loss = 0.0
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            total_loss += loss.item()
        scheduler.step()
        avg_loss = total_loss / len(dataloader)
        print(f"[Supervised] Epoch [{epoch+1}/{epochs}] Loss: {avg_loss:.4f}")
    return model

def evaluate_model(model, dataloader, device=DEVICE):
    """
    Evaluates the model on the provided dataloader.
    """
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in dataloader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            preds = outputs.argmax(dim=1)
            correct += (preds == labels).sum().item()
            total += labels.size(0)
    acc = correct / total
    print(f"Test Accuracy: {acc*100:.2f}%")
    return acc

def select_samples_typiclust(dataset, budget, encoder):
    """
    Given a dataset and a pre-loaded SimCLR encoder, compute embeddings and use TPC-RP
    to select 'budget' samples.
    """
    all_embeddings, _ = compute_embeddings(encoder, dataset, batch_size=128, num_workers=4)
    # Optional visualization of embeddings via t-SNE.
    all_labels = np.array([label for (_, label) in dataset])
    selected_indices, cluster_labels = typical_clustering_selection(all_embeddings, budget=budget, k_nn=20, random_state=42)
    plot_tsne(all_embeddings, cluster_labels, selected_indices=selected_indices, title="t-SNE of CIFAR-10 Embeddings with Selected Points", n_samples=2000)
    print(f"Number of clusters (budget) = {budget}")
    print(f"Number of typical points selected = {len(selected_indices)}")
    return selected_indices

def run_fully_supervised_experiment(budget=10, epochs=200, n_iterations=5):
    """
    Runs the fully supervised active learning experiment.
    For each iteration:
      1) Select 'budget' new samples from the current unlabeled pool using TPC-RP.
      2) Reinitialize and train a new ResNet-18 from scratch on the entire labeled set.
      3) Evaluate the model on the test set.
    Returns a list of test accuracies across iterations.
    """
    train_dataset, test_dataset = get_cifar10_datasets()
    
    # Initialize pre-trained SimCLR encoder once for selection.
    simclr_encoder = SimCLRResNet18(feature_dim=128).to(DEVICE)
    checkpoint_path = 'model/simclr_cifar_10.pth.tar'
    if os.path.exists(checkpoint_path):
        checkpoint = torch.load(checkpoint_path, map_location=DEVICE)
        state_dict = checkpoint.get('state_dict', checkpoint)
        simclr_encoder.load_state_dict(state_dict, strict=False)
        print("Loaded pretrained SimCLR model for sample selection.")
    else:
        print("Pretrained checkpoint not found; using untrained encoder for selection.")
    simclr_encoder.eval()

    all_indices = set(range(len(train_dataset)))
    labeled_indices = []  # Initially, L₀ is empty.
    test_loader = DataLoader(test_dataset, batch_size=128, shuffle=False)
    accuracies = []

    for it in range(n_iterations):
        print(f"\n=== AL Iteration {it+1}/{n_iterations}, Budget = {budget} ===")
        # Define the unlabeled pool.
        unlabeled_indices = list(all_indices - set(labeled_indices))
        unlabeled_subset = Subset(train_dataset, unlabeled_indices)

        # Select new samples using TPC-RP (Typiclust) from the unlabeled subset.
        new_selected_local = select_samples_typiclust(unlabeled_subset, budget, simclr_encoder)
        # Map local indices back to global indices.
        new_selected_global = [unlabeled_indices[i] for i in new_selected_local]
        labeled_indices.extend(new_selected_global)
        print(f"Total labeled so far: {len(labeled_indices)}")

        # Train a new ResNet-18 from scratch on the accumulated labeled set.
        labeled_subset = Subset(train_dataset, labeled_indices)
        train_loader = DataLoader(labeled_subset, batch_size=32, shuffle=True)
        model = build_cnn_model()  # New model for each iteration.
        model = train_model_supervised(model, train_loader, epochs, DEVICE)

        # Evaluate on the test set.
        acc = evaluate_model(model, test_loader, DEVICE)
        accuracies.append(acc)

    return accuracies

def plot_accuracy_vs_budget_over_iterations(budget, epochs, n_iterations, n_runs=3):
    """
    Runs multiple full active learning experiments (with a fixed budget per iteration)
    and plots the average test accuracy over iterations.
    """
    all_runs = []
    for run in range(n_runs):
        print(f"\n+++ AL Experiment Run {run+1}/{n_runs} +++")
        accs = run_fully_supervised_experiment(budget=budget, epochs=epochs, n_iterations=n_iterations)
        all_runs.append(accs)
    all_runs = np.array(all_runs)
    mean_acc = all_runs.mean(axis=0) * 100
    std_acc = all_runs.std(axis=0) * 100

    iterations = np.arange(1, n_iterations + 1)
    plt.figure(figsize=(8, 6))
    plt.errorbar(iterations, mean_acc, yerr=std_acc, fmt='-o', capsize=5)
    plt.xlabel("Active Learning Iteration")
    plt.ylabel("Test Accuracy (%)")
    plt.title(f"Fully Supervised AL (Budget per Iteration = {budget})")
    plt.grid(True)
    plt.show()

def main():
    # Run the fully supervised active learning experiment.
    n_iterations = 5
    # For instance, using a budget of 10 samples per iteration and training for 200 epochs per iteration.
    plot_accuracy_vs_budget_over_iterations(budget=10, epochs=200, n_iterations=n_iterations, n_runs=3)

if __name__ == "__main__":
    main()


Random seed set to 42

+++ AL Experiment Run 1/3 +++
Files already downloaded and verified
Files already downloaded and verified


  checkpoint = torch.load(checkpoint_path, map_location=DEVICE)


Loaded pretrained SimCLR model for sample selection.

=== AL Iteration 1/5, Budget = 10 ===


KeyboardInterrupt: 