In [None]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cdist

# Define Active Learning Strategies
def least_confidence(model, data_loader, device):
    model.eval()
    uncertainties = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            uncertainties.extend(1 - torch.max(probs, dim=1)[0].cpu().numpy())
    return np.argsort(uncertainties)[::-1]  # Sort by least confidence

def prediction_entropy(model, data_loader, device):
    model.eval()
    entropies = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            entropy = -torch.sum(probs * torch.log(probs + 1e-6), dim=1).cpu().numpy()
            entropies.extend(entropy)
    return np.argsort(entropies)[::-1]  # Sort by highest entropy

def margin_sampling(model, data_loader, device):
    model.eval()
    margins = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            sorted_probs, _ = torch.sort(probs, descending=True)
            margin = sorted_probs[:, 0] - sorted_probs[:, 1]
            margins.extend(margin.cpu().numpy())
    return np.argsort(margins)[::-1]  # Sort by largest margin

def cosine_similarity_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    similarities = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            sim = cosine_similarity(embeddings, labeled_embeddings)
            similarities.extend(np.max(sim, axis=1))
    return np.argsort(similarities)[::-1]  # Sort by highest cosine similarity

def l2_norm_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    distances = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            dist = cdist(embeddings, labeled_embeddings, 'euclidean')
            distances.extend(np.min(dist, axis=1))
    return np.argsort(distances)  # Sort by smallest L2 distance

def kl_divergence_selection(model, data_loader, device):
    model.eval()
    divergences = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            log_probs = F.log_softmax(outputs, dim=1)
            divergence = torch.sum(probs * (torch.log(probs + 1e-6) - log_probs), dim=1).cpu().numpy()
            divergences.extend(divergence)
    return np.argsort(divergences)[::-1]  # Sort by largest KL divergence


# Initialize models, criterion, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = CustomCNN(num_conv_layers=2, filter_size=16).to(device)
pretrained_model = models.resnet18(pretrained=True)
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)
pretrained_model = pretrained_model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
custom_optimizer = optim.Adam(custom_model.parameters(), lr=1e-3)
pretrained_optimizer = optim.Adam(pretrained_model.parameters(), lr=1e-3)

# Active learning loop with strategy tracking
num_iterations = 5
samples_per_iteration = 1000
strategy_performance = {  # Track the average accuracy per strategy
    'Least Confidence': [],
    'Prediction Entropy': [],
    'Margin Sampling': [],
    'Cosine Similarity': [],
    'L2 Norm': [],
    'KL Divergence': []
}

# Function to train a model
def train_model(model, data_loader, criterion, optimizer, device):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Accumulate loss
        running_loss += loss.item()

    # Return average loss for this epoch
    epoch_loss = running_loss / len(data_loader)
    return epoch_loss

# Function to test the model
def test_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Active learning loop
for iteration in range(num_iterations):
    print(f"\nActive Learning Iteration {iteration + 1}/{num_iterations}")

    # Train custom model
    for epoch in range(5):
        train_loss = train_model(custom_model, labeled_loader, criterion, custom_optimizer, device)
        print(f"Custom Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    custom_accuracy = test_model(custom_model, test_loader, device)
    print(f"Custom Model Test Accuracy after Iteration {iteration + 1}: {custom_accuracy:.2f}%")

    # Train pretrained model
    for epoch in range(5):
        train_loss = train_model(pretrained_model, labeled_loader, criterion, pretrained_optimizer, device)
        print(f"Pretrained Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    pretrained_accuracy = test_model(pretrained_model, test_loader, device)
    print(f"Pretrained Model Test Accuracy after Iteration {iteration + 1}: {pretrained_accuracy:.2f}%")

    # Active learning strategy
    if len(unlabeled_dataset) > 0:
        unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=batch_size, shuffle=False)

        # Track accuracy for each strategy
        strategy_results = {}

        strategies = [
            ('Least Confidence', least_confidence),
            ('Prediction Entropy', prediction_entropy),
            ('Margin Sampling', margin_sampling),
            ('Cosine Similarity', cosine_similarity_selection),
            ('L2 Norm', l2_norm_selection),
            ('KL Divergence', kl_divergence_selection)
        ]

        for strategy_name, strategy_fn in strategies:
            print(f"Applying {strategy_name}...")

            selected_indices = strategy_fn(custom_model, unlabeled_loader, device, labeled_loader)
            selected_indices = selected_indices[:samples_per_iteration]

            labeled_indices = np.append(labeled_indices, unlabeled_indices[selected_indices])
            unlabeled_indices = np.setdiff1d(unlabeled_indices, unlabeled_indices[selected_indices])

            # Update datasets
            labeled_dataset = Subset(train_dataset, labeled_indices)
            unlabeled_dataset = Subset(train_dataset, unlabeled_indices)
            labeled_loader = DataLoader(labeled_dataset, batch_size=batch_size, shuffle=True)

            # Test the model after applying this strategy
            accuracy = test_model(custom_model, test_loader, device)
            strategy_results[strategy_name] = accuracy
            print(f"{strategy_name} Test Accuracy: {accuracy:.2f}%")

        # Track performance for each strategy over all iterations
        for strategy_name, accuracy in strategy_results.items():
            strategy_performance[strategy_name].append(accuracy)

# Compute average accuracy for each strategy across all iterations
average_accuracy_per_strategy = {strategy: np.mean(accuracies) for strategy, accuracies in strategy_performance.items()}

# Determine the most effective strategy overall
most_effective_strategy = max(average_accuracy_per_strategy, key=average_accuracy_per_strategy.get)
print(f"\nThe Most Effective Active Learning Strategy Overall: {most_effective_strategy}")
ChatGPT said:
ChatGPT