In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cdist

# Define Active Learning Strategies
def least_confidence(model, data_loader, device):
    model.eval()
    uncertainties = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            uncertainties.extend(1 - torch.max(probs, dim=1)[0].cpu().numpy())
    return np.argsort(uncertainties)[::-1]  # Sort by least confidence

def prediction_entropy(model, data_loader, device):
    model.eval()
    entropies = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            entropy = -torch.sum(probs * torch.log(probs + 1e-6), dim=1).cpu().numpy()
            entropies.extend(entropy)
    return np.argsort(entropies)[::-1]  # Sort by highest entropy

def margin_sampling(model, data_loader, device):
    model.eval()
    margins = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            sorted_probs, _ = torch.sort(probs, descending=True)
            margin = sorted_probs[:, 0] - sorted_probs[:, 1]
            margins.extend(margin.cpu().numpy())
    return np.argsort(margins)[::-1]  # Sort by largest margin

def cosine_similarity_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    similarities = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            sim = cosine_similarity(embeddings, labeled_embeddings)
            similarities.extend(np.max(sim, axis=1))
    return np.argsort(similarities)[::-1]  # Sort by highest cosine similarity

def l2_norm_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    distances = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            dist = cdist(embeddings, labeled_embeddings, 'euclidean')
            distances.extend(np.min(dist, axis=1))
    return np.argsort(distances)  # Sort by smallest L2 distance

def kl_divergence_selection(model, data_loader, device):
    model.eval()
    divergences = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            log_probs = F.log_softmax(outputs, dim=1)
            divergence = torch.sum(probs * (torch.log(probs + 1e-6) - log_probs), dim=1).cpu().numpy()
            divergences.extend(divergence)
    return np.argsort(divergences)[::-1]  # Sort by largest KL divergence


# Initialize models, criterion, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = CustomCNN(num_conv_layers=2, filter_size=16).to(device)
pretrained_model = models.resnet18(pretrained=True)
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)
pretrained_model = pretrained_model.to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
custom_optimizer = optim.Adam(custom_model.parameters(), lr=1e-3)
pretrained_optimizer = optim.Adam(pretrained_model.parameters(), lr=1e-3)

# Active learning loop with strategy tracking
num_iterations = 5
samples_per_iteration = 1000
strategy_performance = {  # Track the average accuracy per strategy
    'Least Confidence': [],
    'Prediction Entropy': [],
    'Margin Sampling': [],
    'Cosine Similarity': [],
    'L2 Norm': [],
    'KL Divergence': []
}

# Function to train a model
def train_model(model, data_loader, criterion, optimizer, device):
    model.train()  # Set model to training mode
    running_loss = 0.0
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)

        # Zero the gradients
        optimizer.zero_grad()

        # Forward pass
        outputs = model(images)

        # Calculate loss
        loss = criterion(outputs, labels)

        # Backward pass and optimize
        loss.backward()
        optimizer.step()

        # Accumulate loss
        running_loss += loss.item()

    # Return average loss for this epoch
    epoch_loss = running_loss / len(data_loader)
    return epoch_loss

# Function to test the model
def test_model(model, test_loader, device):
    model.eval()  # Set model to evaluation mode
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()

    accuracy = 100 * correct / total
    return accuracy

# Active learning loop
for iteration in range(num_iterations):
    print(f"\nActive Learning Iteration {iteration + 1}/{num_iterations}")

    # Train custom model
    for epoch in range(5):
        train_loss = train_model(custom_model, labeled_loader, criterion, custom_optimizer, device)
        print(f"Custom Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    custom_accuracy = test_model(custom_model, test_loader, device)
    print(f"Custom Model Test Accuracy after Iteration {iteration + 1}: {custom_accuracy:.2f}%")

    # Train pretrained model
    for epoch in range(5):
        train_loss = train_model(pretrained_model, labeled_loader, criterion, pretrained_optimizer, device)
        print(f"Pretrained Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    pretrained_accuracy = test_model(pretrained_model, test_loader, device)
    print(f"Pretrained Model Test Accuracy after Iteration {iteration + 1}: {pretrained_accuracy:.2f}%")

    # Active learning strategy
    if len(unlabeled_dataset) > 0:
        unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=batch_size, shuffle=False)

        # Track accuracy for each strategy
        strategy_results = {}

        strategies = [
            ('Least Confidence', least_confidence),
            ('Prediction Entropy', prediction_entropy),
            ('Margin Sampling', margin_sampling),
            ('Cosine Similarity', cosine_similarity_selection),
            ('L2 Norm', l2_norm_selection),
            ('KL Divergence', kl_divergence_selection)
        ]

        for strategy_name, strategy_fn in strategies:
            print(f"Applying {strategy_name}...")

            selected_indices = strategy_fn(custom_model, unlabeled_loader, device, labeled_loader)
            selected_indices = selected_indices[:samples_per_iteration]

            labeled_indices = np.append(labeled_indices, unlabeled_indices[selected_indices])
            unlabeled_indices = np.setdiff1d(unlabeled_indices, unlabeled_indices[selected_indices])

            # Update datasets
            labeled_dataset = Subset(train_dataset, labeled_indices)
            unlabeled_dataset = Subset(train_dataset, unlabeled_indices)
            labeled_loader = DataLoader(labeled_dataset, batch_size=batch_size, shuffle=True)

            # Test the model after applying this strategy
            accuracy = test_model(custom_model, test_loader, device)
            strategy_results[strategy_name] = accuracy
            print(f"{strategy_name} Test Accuracy: {accuracy:.2f}%")

        # Track performance for each strategy over all iterations
        for strategy_name, accuracy in strategy_results.items():
            strategy_performance[strategy_name].append(accuracy)

# Compute average accuracy for each strategy across all iterations
average_accuracy_per_strategy = {strategy: np.mean(accuracies) for strategy, accuracies in strategy_performance.items()}

# Determine the most effective strategy overall
most_effective_strategy = max(average_accuracy_per_strategy, key=average_accuracy_per_strategy.get)
print(f"\nThe Most Effective Active Learning Strategy Overall: {most_effective_strategy}")


NameError: name 'CustomCNN' is not defined

In [2]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cdist

# Define the CustomCNN class
class CustomCNN(nn.Module):
    def __init__(self, num_conv_layers=2, filter_size=16, num_classes=10):
        super(CustomCNN, self).__init__()
        layers = []
        in_channels = 3  # Assuming input images are RGB
        for _ in range(num_conv_layers):
            layers.append(nn.Conv2d(in_channels, filter_size, kernel_size=3, padding=1))
            layers.append(nn.ReLU())
            layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
            in_channels = filter_size
            filter_size *= 2  # Increase filter size in each layer
        self.conv = nn.Sequential(*layers)
        self.fc = nn.Linear(in_channels * 8 * 8, num_classes)  # Adjust for input size (32x32 for CIFAR-10)

    def forward(self, x):
        x = self.conv(x)
        x = torch.flatten(x, start_dim=1)
        x = self.fc(x)
        return x

# Define Active Learning Strategies
def least_confidence(model, data_loader, device):
    model.eval()
    uncertainties = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            uncertainties.extend(1 - torch.max(probs, dim=1)[0].cpu().numpy())
    return np.argsort(uncertainties)[::-1]

def prediction_entropy(model, data_loader, device):
    model.eval()
    entropies = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            entropy = -torch.sum(probs * torch.log(probs + 1e-6), dim=1).cpu().numpy()
            entropies.extend(entropy)
    return np.argsort(entropies)[::-1]

def margin_sampling(model, data_loader, device):
    model.eval()
    margins = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            sorted_probs, _ = torch.sort(probs, descending=True)
            margin = sorted_probs[:, 0] - sorted_probs[:, 1]
            margins.extend(margin.cpu().numpy())
    return np.argsort(margins)[::-1]

def cosine_similarity_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    similarities = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            sim = cosine_similarity(embeddings, labeled_embeddings)
            similarities.extend(np.max(sim, axis=1))
    return np.argsort(similarities)[::-1]

def l2_norm_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    distances = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            dist = cdist(embeddings, labeled_embeddings, 'euclidean')
            distances.extend(np.min(dist, axis=1))
    return np.argsort(distances)

def kl_divergence_selection(model, data_loader, device):
    model.eval()
    divergences = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            log_probs = F.log_softmax(outputs, dim=1)
            divergence = torch.sum(probs * (torch.log(probs + 1e-6) - log_probs), dim=1).cpu().numpy()
            divergences.extend(divergence)
    return np.argsort(divergences)[::-1]

# Initialize models, criterion, optimizer
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = CustomCNN(num_conv_layers=2, filter_size=16).to(device)
pretrained_model = models.resnet18(pretrained=True)
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)
pretrained_model = pretrained_model.to(device)

criterion = nn.CrossEntropyLoss()
custom_optimizer = optim.Adam(custom_model.parameters(), lr=1e-3)
pretrained_optimizer = optim.Adam(pretrained_model.parameters(), lr=1e-3)

# Dummy placeholders for DataLoader and Dataset (replace with your dataset)
labeled_loader = None  # Replace with DataLoader for labeled data
unlabeled_loader = None  # Replace with DataLoader for unlabeled data
test_loader = None  # Replace with DataLoader for test data
train_dataset = None  # Replace with your dataset
labeled_indices = []  # List of indices for labeled data
unlabeled_indices = []  # List of indices for unlabeled data

# Active learning loop with strategy tracking
num_iterations = 5
samples_per_iteration = 1000
strategy_performance = {
    'Least Confidence': [],
    'Prediction Entropy': [],
    'Margin Sampling': [],
    'Cosine Similarity': [],
    'L2 Norm': [],
    'KL Divergence': []
}

def train_model(model, data_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(data_loader)

def test_model(model, test_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

for iteration in range(num_iterations):
    print(f"\nActive Learning Iteration {iteration + 1}/{num_iterations}")

    # Train custom model
    for epoch in range(5):
        train_loss = train_model(custom_model, labeled_loader, criterion, custom_optimizer, device)
        print(f"Custom Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    custom_accuracy = test_model(custom_model, test_loader, device)
    print(f"Custom Model Test Accuracy after Iteration {iteration + 1}: {custom_accuracy:.2f}%")

    # Train pretrained model
    for epoch in range(5):
        train_loss = train_model(pretrained_model, labeled_loader, criterion, pretrained_optimizer, device)
        print(f"Pretrained Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    pretrained_accuracy = test_model(pretrained_model, test_loader, device)
    print(f"Pretrained Model Test Accuracy after Iteration {iteration + 1}: {pretrained_accuracy:.2f}%")

    # Active learning strategy (skipped due to placeholders for datasets)


Downloading: "https://download.pytorch.org/models/resnet18-f37072fd.pth" to /root/.cache/torch/hub/checkpoints/resnet18-f37072fd.pth
100%|██████████| 44.7M/44.7M [00:00<00:00, 161MB/s]



Active Learning Iteration 1/5


TypeError: 'NoneType' object is not iterable

In [3]:
import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms, models
from torch.utils.data import DataLoader, Subset
import numpy as np
import torch.nn.functional as F
from sklearn.metrics.pairwise import cosine_similarity
from scipy.spatial.distance import cdist

# Define Active Learning Strategies
def least_confidence(model, data_loader, device):
    model.eval()
    uncertainties = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            uncertainties.extend(1 - torch.max(probs, dim=1)[0].cpu().numpy())
    return np.argsort(uncertainties)[::-1]

def prediction_entropy(model, data_loader, device):
    model.eval()
    entropies = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            entropy = -torch.sum(probs * torch.log(probs + 1e-6), dim=1).cpu().numpy()
            entropies.extend(entropy)
    return np.argsort(entropies)[::-1]

def margin_sampling(model, data_loader, device):
    model.eval()
    margins = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            sorted_probs, _ = torch.sort(probs, descending=True)
            margin = sorted_probs[:, 0] - sorted_probs[:, 1]
            margins.extend(margin.cpu().numpy())
    return np.argsort(margins)[::-1]

def cosine_similarity_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    similarities = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            sim = cosine_similarity(embeddings, labeled_embeddings)
            similarities.extend(np.max(sim, axis=1))
    return np.argsort(similarities)[::-1]

def l2_norm_selection(model, data_loader, device, labeled_data):
    model.eval()
    labeled_embeddings = []
    with torch.no_grad():
        for images, _ in labeled_data:
            images = images.to(device)
            outputs = model(images)
            labeled_embeddings.append(outputs.cpu().numpy())
    labeled_embeddings = np.concatenate(labeled_embeddings, axis=0)

    distances = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            embeddings = outputs.cpu().numpy()
            dist = cdist(embeddings, labeled_embeddings, 'euclidean')
            distances.extend(np.min(dist, axis=1))
    return np.argsort(distances)

def kl_divergence_selection(model, data_loader, device):
    model.eval()
    divergences = []
    with torch.no_grad():
        for images, _ in data_loader:
            images = images.to(device)
            outputs = model(images)
            probs = F.softmax(outputs, dim=1)
            log_probs = F.log_softmax(outputs, dim=1)
            divergence = torch.sum(probs * (torch.log(probs + 1e-6) - log_probs), dim=1).cpu().numpy()
            divergences.extend(divergence)
    return np.argsort(divergences)[::-1]

# Initialize datasets
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

# Initialize labeled and unlabeled datasets
initial_labeled_samples = 100
batch_size = 64

labeled_indices = np.random.choice(len(train_dataset), size=initial_labeled_samples, replace=False)
unlabeled_indices = np.setdiff1d(np.arange(len(train_dataset)), labeled_indices)

labeled_dataset = Subset(train_dataset, labeled_indices)
unlabeled_dataset = Subset(train_dataset, unlabeled_indices)

labeled_loader = DataLoader(labeled_dataset, batch_size=batch_size, shuffle=True)
unlabeled_loader = DataLoader(unlabeled_dataset, batch_size=batch_size, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define models
class CustomCNN(nn.Module):
    def __init__(self, num_conv_layers=2, filter_size=16):
        super(CustomCNN, self).__init__()
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1, filter_size, kernel_size=3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )
        for _ in range(num_conv_layers - 1):
            self.conv_layers.add_module(
                f"conv{_ + 2}",
                nn.Sequential(
                    nn.Conv2d(filter_size, filter_size, kernel_size=3, padding=1),
                    nn.ReLU(),
                    nn.MaxPool2d(2)
                )
            )
        self.fc = nn.Linear(filter_size * 7 * 7, 10)

    def forward(self, x):
        x = self.conv_layers(x)
        x = x.view(x.size(0), -1)
        x = self.fc(x)
        return x

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
custom_model = CustomCNN(num_conv_layers=2, filter_size=16).to(device)
pretrained_model = models.resnet18(weights="IMAGENET1K_V1")
pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)
pretrained_model = pretrained_model.to(device)

# Define loss and optimizers
criterion = nn.CrossEntropyLoss()
custom_optimizer = optim.Adam(custom_model.parameters(), lr=1e-3)
pretrained_optimizer = optim.Adam(pretrained_model.parameters(), lr=1e-3)

# Train and test functions
def train_model(model, data_loader, criterion, optimizer, device):
    model.train()
    running_loss = 0.0
    for images, labels in data_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(data_loader)

def test_model(model, data_loader, device):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in data_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# Active learning loop
num_iterations = 5
samples_per_iteration = 1000
strategy_performance = {
    'Least Confidence': [],
    'Prediction Entropy': [],
    'Margin Sampling': [],
    'Cosine Similarity': [],
    'L2 Norm': [],
    'KL Divergence': []
}

for iteration in range(num_iterations):
    print(f"\nActive Learning Iteration {iteration + 1}/{num_iterations}")

    # Train custom model
    for epoch in range(5):
        train_loss = train_model(custom_model, labeled_loader, criterion, custom_optimizer, device)
        print(f"Custom Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    custom_accuracy = test_model(custom_model, test_loader, device)
    print(f"Custom Model Test Accuracy after Iteration {iteration + 1}: {custom_accuracy:.2f}%")

    # Train pretrained model
    for epoch in range(5):
        train_loss = train_model(pretrained_model, labeled_loader, criterion, pretrained_optimizer, device)
        print(f"Pretrained Model - Iteration {iteration + 1}, Epoch {epoch + 1}, Loss: {train_loss:.4f}")

    pretrained_accuracy = test_model(pretrained_model, test_loader, device)
    print(f"Pretrained Model Test Accuracy after Iteration {iteration + 1}: {pretrained_accuracy:.2f}%")

    # Active learning strategies
    if len(unlabeled_dataset) > 0:
        strategy_results = {}
        strategies = [
            ('Least Confidence', least_confidence),
            ('Prediction Entropy', prediction_entropy),
            ('Margin Sampling', margin_sampling),
            ('Cosine Similarity', cosine_similarity_selection),
            ('L2 Norm', l2_norm_selection),
            ('KL Divergence', kl_divergence_selection)
        ]

        for strategy_name, strategy_fn in strategies:
            print(f"Applying {strategy_name}...")

            selected_indices = strategy_fn(custom_model, unlabeled_loader, device)
            selected_indices = selected_indices[:samples_per_iteration]

            labeled_indices = np.append(labeled_indices, unlabeled_indices[selected_indices])
            unlabeled_indices = np.setdiff1d(unlabeled_indices, unlabeled_indices[selected_indices])

            labeled_dataset = Subset(train_dataset, labeled_indices)
            unlabeled_dataset = Subset(train_dataset, unlabeled_indices)
            labeled_loader = DataLoader(labeled_dataset, batch_size=batch_size, shuffle=True)

            accuracy = test_model(custom_model, test_loader, device)
            strategy_results[strategy_name] = accuracy
            print(f"{strategy_name} Test Accuracy: {accuracy:.2f}%")

        for strategy_name, accuracy in strategy_results.items():
            strategy_performance[strategy_name].append(accuracy)

average_accuracy_per_strategy


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 11.6MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 352kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 3.20MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.22MB/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw


Active Learning Iteration 1/5
Custom Model - Iteration 1, Epoch 1, Loss: 2.3071
Custom Model - Iteration 1, Epoch 2, Loss: 2.2330
Custom Model - Iteration 1, Epoch 3, Loss: 2.1865
Custom Model - Iteration 1, Epoch 4, Loss: 2.1203
Custom Model - Iteration 1, Epoch 5, Loss: 2.0754
Custom Model Test Accuracy after Iteration 1: 16.07%


RuntimeError: Given groups=1, weight of size [64, 3, 7, 7], expected input[64, 1, 28, 28] to have 3 channels, but got 1 channels instead

In [5]:


import torch
import torch.nn as nn
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

# Define CNN model class
def create_model(num_conv_layers, filter_size):
    layers = []
    in_channels = 1  # Fashion-MNIST has 1 channel

    for i in range(num_conv_layers):
        out_channels = filter_size * (2 ** i)
        layers.append(nn.Conv2d(in_channels, out_channels, kernel_size=3, padding=1))
        layers.append(nn.ReLU())
        layers.append(nn.MaxPool2d(kernel_size=2, stride=2))
        in_channels = out_channels

    layers.append(nn.Flatten())

    # Dynamically compute the input size for the first Linear layer
    dummy_input = torch.zeros(1, 1, 28, 28)  # Assuming input size 28x28
    with torch.no_grad():
        for layer in layers:
            dummy_input = layer(dummy_input) if isinstance(layer, (nn.Conv2d, nn.MaxPool2d)) else dummy_input
    flatten_size = dummy_input.numel()

    layers.append(nn.Linear(flatten_size, 128))
    layers.append(nn.ReLU())
    layers.append(nn.Linear(128, 10))

    return nn.Sequential(*layers)

# Train function
def train_model(model, train_loader, criterion, optimizer):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(images)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()
    return running_loss / len(train_loader)

# Test function
def test_model(model, test_loader):
    model.eval()
    correct = 0
    total = 0
    with torch.no_grad():
        for images, labels in test_loader:
            images, labels = images.to(device), labels.to(device)
            outputs = model(images)
            _, predicted = torch.max(outputs, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
    return 100 * correct / total

# Device configuration
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Load dataset
transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])
train_dataset = datasets.FashionMNIST(root="./data", train=True, transform=transform, download=True)
test_dataset = datasets.FashionMNIST(root="./data", train=False, transform=transform, download=True)

# Hyperparameter combinations
configs = [
    # {"num_conv_layers": 2, "filter_size": 16, "learning_rate": 0.001, "batch_size": 32},
    # {"num_conv_layers": 2, "filter_size": 32, "learning_rate": 0.001, "batch_size": 64},
    # {"num_conv_layers": 3, "filter_size": 16, "learning_rate": 0.001, "batch_size": 32},
    # {"num_conv_layers": 3, "filter_size": 32, "learning_rate": 0.001, "batch_size": 64},
    # {"num_conv_layers": 2, "filter_size": 16, "learning_rate": 0.0005, "batch_size": 32},
    # {"num_conv_layers": 2, "filter_size": 32, "learning_rate": 0.0005, "batch_size": 64},
    # {"num_conv_layers": 3, "filter_size": 16, "learning_rate": 0.0005, "batch_size": 32},
    # {"num_conv_layers": 3, "filter_size": 32, "learning_rate": 0.0005, "batch_size": 64},
    # {"num_conv_layers": 4, "filter_size": 16, "learning_rate": 0.001, "batch_size": 32},
    {"num_conv_layers": 4, "filter_size": 32, "learning_rate": 0.001, "batch_size": 64},
]

# Results storage
results = []

# Experiment with configurations
for idx, config in enumerate(configs):
    print(f"Running configuration {idx + 1}/{len(configs)}: {config}")

    # Create data loaders
    train_loader = DataLoader(train_dataset, batch_size=config["batch_size"], shuffle=True)
    test_loader = DataLoader(test_dataset, batch_size=config["batch_size"], shuffle=False)

    # Create model
    model = create_model(config["num_conv_layers"], config["filter_size"]).to(device)

    # Define loss function and optimizer
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=config["learning_rate"])

    # Train the model
    for epoch in range(5):  # Train for 5 epochs per configuration
        train_loss = train_model(model, train_loader, criterion, optimizer)
        print(f"Epoch [{epoch + 1}/5], Loss: {train_loss:.4f}")

    # Test the model
    accuracy = test_model(model, test_loader)
    print(f"Accuracy: {accuracy:.2f}%\n")

    # Store results
    results.append({"config": config, "accuracy": accuracy})

# Print summary of results
print("Experiment Results:")
for result in results:
    print(f"Config: {result['config']}, Accuracy: {result['accuracy']:.2f}%")


# Config: {'num_conv_layers': 4, 'filter_size': 32, 'learning_rate': 0.001, 'batch_size': 64}
# best accuracy: Accuracy: 91.11%


Running configuration 1/1: {'num_conv_layers': 4, 'filter_size': 32, 'learning_rate': 0.001, 'batch_size': 64}
Epoch [1/5], Loss: 0.5029
Epoch [2/5], Loss: 0.3019
Epoch [3/5], Loss: 0.2496
Epoch [4/5], Loss: 0.2153
Epoch [5/5], Loss: 0.1895
Accuracy: 91.11%

Experiment Results:
Config: {'num_conv_layers': 4, 'filter_size': 32, 'learning_rate': 0.001, 'batch_size': 64}, Accuracy: 91.11%
