In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader, Subset
import numpy as np

# CNN Model with Feature Extraction Support
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3, padding=1)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        self.fc1 = nn.Linear(64 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x, return_features=False):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))

        if return_features:
            return x  # Return feature maps for active learning

        x = x.view(-1, 64 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

# Prepare Data
def prepare_data():
    transform = transforms.Compose([
        transforms.ToTensor(),
        transforms.Normalize((0.5,), (0.5,))
    ])
    train_dataset = datasets.FashionMNIST(root='./data', train=True, transform=transform, download=True)
    test_dataset = datasets.FashionMNIST(root='./data', train=False, transform=transform, download=True)
    return train_dataset, test_dataset

# Split Dataset into Labeled and Unlabeled
def split_dataset(dataset, initial_labeled_size):
    indices = list(range(len(dataset)))
    labeled_indices = np.random.choice(indices, size=initial_labeled_size, replace=False)
    unlabeled_indices = [i for i in indices if i not in labeled_indices]
    return Subset(dataset, labeled_indices), Subset(dataset, unlabeled_indices)

# Train Model
def train_model(model, data_loader, optimizer, criterion, epochs=10):
    model.train()
    for epoch in range(epochs):
        running_loss = 0.0
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(images)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
            running_loss += loss.item()
        print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(data_loader):.4f}")

# Evaluate Model
def evaluate_model(model, data_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    accuracy = 100 * correct / total
    return accuracy

# Select Uncertain Samples for Active Learning
def get_uncertain_samples(model, data_loader, num_samples, strategy="entropy"):
    model.eval()
    uncertainties = []
    with torch.no_grad():
        for images, indices in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            
            if strategy == "entropy":
                entropy = -torch.sum(probs * torch.log(probs + 1e-10), dim=1)
                uncertainties.extend(zip(entropy.tolist(), indices.tolist()))
            elif strategy == "least_confidence":
                confidence, _ = torch.max(probs, dim=1)
                uncertainties.extend(zip(-confidence.tolist(), indices.tolist()))
            elif strategy == "margin":
                sorted_probs, _ = probs.sort(dim=1, descending=True)
                margin = sorted_probs[:, 0] - sorted_probs[:, 1]
                uncertainties.extend(zip(-margin.tolist(), indices.tolist()))
    
    uncertainties.sort(reverse=True, key=lambda x: x[0])
    return [index for _, index in uncertainties[:num_samples]]

# Select Diverse Samples for Active Learning
def get_diverse_samples(model, data_loader, num_samples, diversity_metric="cosine_similarity", batch_size=256):
    model.eval()
    features_list, indices_list = [], []
    
    with torch.no_grad():
        for images, img_indices in data_loader:
            images = images.to(device)
            feature_maps = model(images, return_features=True)  # Extract feature maps
            features = F.adaptive_avg_pool2d(feature_maps, (1, 1)).view(feature_maps.size(0), -1)
            features_list.append(features.cpu())  # Move to CPU
            indices_list.extend(img_indices)

    features = torch.cat(features_list, dim=0)
    indices = torch.tensor(indices_list)

    selected_indices = []
    seen_indices = set()

    for i in range(0, len(features), batch_size):
        batch_features = features[i : i + batch_size]
        
        if diversity_metric == "cosine_similarity":
            similarity_matrix = F.cosine_similarity(batch_features.unsqueeze(1), features.unsqueeze(0), dim=2)
            uncertainties = 1 - similarity_matrix.max(dim=1)[0]
        elif diversity_metric == "l2_norm":
            center = features.mean(dim=0)
            distances = torch.norm(batch_features - center, dim=1)
            uncertainties = distances
        else:
            raise ValueError("Unsupported diversity metric!")

        sorted_batch_indices = uncertainties.argsort(descending=True)
        for idx in sorted_batch_indices:
            global_idx = indices[i + idx]  # Convert batch index to dataset index
            if global_idx not in seen_indices:
                selected_indices.append(global_idx.item())
                seen_indices.add(global_idx.item())
            if len(selected_indices) >= num_samples:
                return selected_indices

    return selected_indices[:num_samples]

# Main Script
if __name__ == "__main__":
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

    # Load Data
    train_dataset, test_dataset = prepare_data()
    initial_labeled_size = 1000
    labeled_set, unlabeled_set = split_dataset(train_dataset, initial_labeled_size)

    labeled_loader = DataLoader(labeled_set, batch_size=64, shuffle=True)
    unlabeled_loader = DataLoader(unlabeled_set, batch_size=64, shuffle=False)
    test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

    # Initialize Model, Loss, Optimizer
    model = CNN().to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=0.001)

    # Train Initial Model
    print("Training on initial labeled dataset...")
    train_model(model, labeled_loader, optimizer, criterion, epochs=10)

    # Evaluate Model
    accuracy = evaluate_model(model, test_loader)
    print(f"Initial Test Accuracy: {accuracy:.2f}%")

    # Active Learning Iterations
    num_iterations = 5
    num_samples = 100

    for iteration in range(num_iterations):
        print(f"\nActive Learning Iteration {iteration + 1}")

        # Select new samples
        new_indices_uncertainty = get_uncertain_samples(model, unlabeled_loader, num_samples, strategy="entropy")
        new_indices_diversity = get_diverse_samples(model, unlabeled_loader, num_samples, diversity_metric="cosine_similarity")

        # Combine indices and update dataset
        new_indices = list(set(new_indices_uncertainty + new_indices_diversity))
        labeled_set_indices = list(labeled_set.indices) + new_indices
        unlabeled_set_indices = [i for i in unlabeled_set.indices if i not in new_indices]

        labeled_set = Subset(train_dataset, labeled_set_indices)
        unlabeled_set = Subset(train_dataset, unlabeled_set_indices)

        # Update Data Loaders
        labeled_loader = DataLoader(labeled_set, batch_size=64, shuffle=True)
        unlabeled_loader = DataLoader(unlabeled_set, batch_size=64, shuffle=False)

        # Retrain Model
        train_model(model, labeled_loader, optimizer, criterion, epochs=5)

        # Evaluate Model
        accuracy = evaluate_model(model, test_loader)
        print(f"Iteration {iteration + 1} Test Accuracy: {accuracy:.2f}%")


Training on initial labeled dataset...
Epoch 1/10, Loss: 1.6856
Epoch 2/10, Loss: 0.8666
Epoch 3/10, Loss: 0.6764
Epoch 4/10, Loss: 0.6005
Epoch 5/10, Loss: 0.5527
Epoch 6/10, Loss: 0.5115
Epoch 7/10, Loss: 0.4506
Epoch 8/10, Loss: 0.4088
Epoch 9/10, Loss: 0.3812
Epoch 10/10, Loss: 0.3588
Initial Test Accuracy: 80.67%

Active Learning Iteration 1
Epoch 1/5, Loss: 0.3271
Epoch 2/5, Loss: 0.2723
Epoch 3/5, Loss: 0.2593
Epoch 4/5, Loss: 0.2430
Epoch 5/5, Loss: 0.2082
Iteration 1 Test Accuracy: 79.91%

Active Learning Iteration 2
Epoch 1/5, Loss: 0.2215
Epoch 2/5, Loss: 0.2176
Epoch 3/5, Loss: 0.2012
Epoch 4/5, Loss: 0.1755
Epoch 5/5, Loss: 0.1488
Iteration 2 Test Accuracy: 80.54%

Active Learning Iteration 3
Epoch 1/5, Loss: 0.1326
Epoch 2/5, Loss: 0.1497
Epoch 3/5, Loss: 0.1320
Epoch 4/5, Loss: 0.1241
Epoch 5/5, Loss: 0.1111
Iteration 3 Test Accuracy: 80.72%

Active Learning Iteration 4
Epoch 1/5, Loss: 0.0934
Epoch 2/5, Loss: 0.0704
Epoch 3/5, Loss: 0.0612
Epoch 4/5, Loss: 0.0574
Epoch 