In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms
from torch.utils.data import DataLoader

batch_size = 64
learning_rate = 0.001
epochs = 10

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  
])

train_dataset = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_dataset = datasets.MNIST(root='./data', train=False, transform=transform, download=True)

train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)


In [2]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(1, 32, kernel_size=3) 
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3) 
        self.fc1 = nn.Linear(64 * 5 * 5, 128)  
        self.fc2 = nn.Linear(128, 10) 

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)    
        x = F.relu(self.conv2(x)) 
        x = F.max_pool2d(x, 2)
        x = x.view(x.size(0), -1)  
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

model = CNN()


In [3]:
criterion = nn.CrossEntropyLoss()  
optimizer = optim.Adam(model.parameters(), lr=learning_rate)  


In [4]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
model.to(device)

for epoch in range(epochs):
    model.train()
    running_loss = 0.0
    for inputs, labels in train_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss/len(train_loader):.4f}")


Epoch 1/10, Loss: 0.1488
Epoch 2/10, Loss: 0.0452
Epoch 3/10, Loss: 0.0310
Epoch 4/10, Loss: 0.0228
Epoch 5/10, Loss: 0.0169
Epoch 6/10, Loss: 0.0137
Epoch 7/10, Loss: 0.0120
Epoch 8/10, Loss: 0.0091
Epoch 9/10, Loss: 0.0070
Epoch 10/10, Loss: 0.0055


In [5]:
 model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in test_loader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

accuracy = 100 * correct / total
print(f"Test Accuracy: {accuracy:.2f}%")


Test Accuracy: 99.26%


In [6]:
from torchvision import models

pretrained_model = models.resnet18(pretrained=True)

pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)

pretrained_model.to(device)





ResNet(
  (conv1): Conv2d(3, 64, kernel_size=(7, 7), stride=(2, 2), padding=(3, 3), bias=False)
  (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
  (relu): ReLU(inplace=True)
  (maxpool): MaxPool2d(kernel_size=3, stride=2, padding=1, dilation=1, ceil_mode=False)
  (layer1): Sequential(
    (0): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
      (conv2): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn2): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
    )
    (1): BasicBlock(
      (conv1): Conv2d(64, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1), bias=False)
      (bn1): BatchNorm2d(64, eps=1e-05, momentum=0.1, affine=True, track_running_stats=True)
      (relu): ReLU(inplace=True)
  

In [14]:
import numpy as np
from scipy.spatial.distance import cdist
import torch.nn.functional as F

def least_confidence(predictions):
    return 1 - predictions.max(axis=1)
def prediction_entropy(predictions):
    return -np.sum(predictions * np.log(predictions + 1e-10), axis=1)
def margin_sampling(predictions):
    sorted_preds = np.sort(predictions, axis=1)[:, ::-1]
    return sorted_preds[:, 0] - sorted_preds[:, 1]
def cosine_similarity(features, labeled_features):
    similarities = cdist(features, labeled_features, metric='cosine')
    return np.min(similarities, axis=1)
def l2_norm(features, labeled_features):
    distances = cdist(features, labeled_features, metric='euclidean')
    return np.min(distances, axis=1)
def kl_divergence(predictions, labeled_predictions):
    mean_labeled_predictions = np.mean(labeled_predictions, axis=0, keepdims=True)
    predictions = np.clip(predictions, 1e-10, 1.0)
    mean_labeled_predictions = np.clip(mean_labeled_predictions, 1e-10, 1.0)
    kl_values = np.sum(predictions * (np.log(predictions) - np.log(mean_labeled_predictions)), axis=1)
    return kl_values


In [15]:
import numpy as np
from torch.utils.data import Subset

def select_samples(model, unlabeled_loader, labeled_features=None, strategy='least_confidence', n_samples=100):
    model.eval()
    features = []
    predictions = []

    with torch.no_grad():
        for inputs, _ in unlabeled_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            softmax_preds = F.softmax(outputs, dim=1).cpu().numpy()
            features.append(outputs.cpu().numpy())
            predictions.append(softmax_preds)

    features = np.concatenate(features)
    predictions = np.concatenate(predictions)

    if labeled_features is None:
        labeled_features = np.empty((0, features.shape[1]))  
    else:
        labeled_features = np.array(labeled_features)
        if labeled_features.ndim == 1:
            labeled_features = labeled_features.reshape(1, -1)

    if strategy == 'least_confidence':
        scores = least_confidence(predictions)
    elif strategy == 'prediction_entropy':
        scores = prediction_entropy(predictions)
    elif strategy == 'margin_sampling':
        scores = margin_sampling(predictions)
    elif strategy == 'cosine_similarity':
        scores = cosine_similarity(features, labeled_features)
    elif strategy == 'l2_norm':
        scores = l2_norm(features, labeled_features)
    elif strategy == 'kl_divergence':
        scores = kl_divergence(predictions, labeled_features)
    else:
        raise ValueError(f"Unknown strategy: {strategy}")

    selected_indices = np.argsort(scores)[-n_samples:]
    return selected_indices


In [17]:
strategies = ["kl_divergence","least_confidence", "prediction_entropy", "margin_sampling","cosine_similarity", "l2_norm"
              ]
n_samples_per_iteration = 100
iterations = 10 
batch_size = 64
def extract_features(model, data_loader):
    model.eval()
    features = []
    with torch.no_grad():
        for inputs, _ in data_loader:
            inputs = inputs.to(device)
            outputs = model(inputs)
            features.append(outputs.cpu().numpy())
    return np.concatenate(features)

for strategy in strategies:
    print(f"Starting Active Learning with strategy: {strategy}")
    
    labeled_indices = np.random.choice(len(train_dataset), size=500, replace=False)
    unlabeled_indices = np.setdiff1d(np.arange(len(train_dataset)), labeled_indices)
    
    labeled_set = Subset(train_dataset, labeled_indices)
    unlabeled_set = Subset(train_dataset, unlabeled_indices)
    
    labeled_loader = DataLoader(labeled_set, batch_size=batch_size, shuffle=True)
    unlabeled_loader = DataLoader(unlabeled_set, batch_size=batch_size, shuffle=False)
  
    for iteration in range(3):
        model.train()
        for inputs, labels in labeled_loader:
            inputs, labels = inputs.to(device), labels.to(device)
            optimizer.zero_grad()
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            loss.backward()
            optimizer.step()
        
        labeled_features = extract_features(model, labeled_loader)
        
        selected_indices = select_samples(model, unlabeled_loader, labeled_features, strategy=strategy, n_samples=n_samples_per_iteration)
        
        new_labeled_indices = unlabeled_indices[selected_indices]
        labeled_indices = np.concatenate([labeled_indices, new_labeled_indices])
        unlabeled_indices = np.setdiff1d(unlabeled_indices, new_labeled_indices)
        
        labeled_set = Subset(train_dataset, labeled_indices)
        unlabeled_set = Subset(train_dataset, unlabeled_indices)
        
        labeled_loader = DataLoader(labeled_set, batch_size=batch_size, shuffle=True)
        unlabeled_loader = DataLoader(unlabeled_set, batch_size=batch_size, shuffle=False)
        
        correct = 0
        total = 0
        model.eval()
        with torch.no_grad():
            for inputs, labels in test_loader:
                inputs, labels = inputs.to(device), labels.to(device)
                outputs = model(inputs)
                _, predicted = torch.max(outputs, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        print(f"Strategy: {strategy}, Iteration {iteration+1}/{iterations}, Test Accuracy: {accuracy:.2f}%")


Starting Active Learning with strategy: kl_divergence
Strategy: kl_divergence, Iteration 1/10, Test Accuracy: 98.99%
Strategy: kl_divergence, Iteration 2/10, Test Accuracy: 98.98%
Strategy: kl_divergence, Iteration 3/10, Test Accuracy: 98.95%
Starting Active Learning with strategy: least_confidence
Strategy: least_confidence, Iteration 1/10, Test Accuracy: 98.98%
Strategy: least_confidence, Iteration 2/10, Test Accuracy: 99.04%
Strategy: least_confidence, Iteration 3/10, Test Accuracy: 98.95%
Starting Active Learning with strategy: prediction_entropy
Strategy: prediction_entropy, Iteration 1/10, Test Accuracy: 97.40%
Strategy: prediction_entropy, Iteration 2/10, Test Accuracy: 98.83%
Strategy: prediction_entropy, Iteration 3/10, Test Accuracy: 98.88%
Starting Active Learning with strategy: margin_sampling
Strategy: margin_sampling, Iteration 1/10, Test Accuracy: 98.91%
Strategy: margin_sampling, Iteration 2/10, Test Accuracy: 98.94%
Strategy: margin_sampling, Iteration 3/10, Test Accur