Q.1> An image classification model using active learning stragies to enhance model performance and accuracy

CNN MODEL

In [16]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F

transform = transforms.Compose([transforms.ToTensor()])

trainset = torchvision.datasets.MNIST('mnist_data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = torchvision.datasets.MNIST('mnist_data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False)

# Define the model with dropout for regularization
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.dropout = nn.Dropout(0.5)  # Dropout added with 50% rate

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Dropout applied after the first fully connected layer
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Using Adam optimizer
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Learning rate scheduler

# Training loop
for epoch in range(10):  # You can increase epochs here
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(trainloader)
    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    
    # Adjust the learning rate using the scheduler
    scheduler.step()

# Final testing
model.eval()
correct = 0
total = 0
with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

test_accuracy = 100 * correct / total
print(f"The Accuracy Achieved in this CNN model is: {test_accuracy:.2f}%")


Epoch 1, Loss: 0.2093, Accuracy: 93.70%
Epoch 2, Loss: 0.0952, Accuracy: 97.33%
Epoch 3, Loss: 0.0809, Accuracy: 97.77%
Epoch 4, Loss: 0.0712, Accuracy: 98.06%
Epoch 5, Loss: 0.0677, Accuracy: 98.12%
Epoch 6, Loss: 0.0451, Accuracy: 98.79%
Epoch 7, Loss: 0.0410, Accuracy: 98.88%
Epoch 8, Loss: 0.0389, Accuracy: 98.88%
Epoch 9, Loss: 0.0347, Accuracy: 99.04%
Epoch 10, Loss: 0.0365, Accuracy: 99.00%
The Accuracy Achieved in this CNN model is: 98.95%


CNN MODEL WITH ACTIVE LEARNING STRATIGIES APPLIED

In [17]:
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision
import torchvision.transforms as transforms
import torch.nn.functional as F
from sklearn.metrics import pairwise_distances
from scipy.stats import entropy
import numpy as np

transform = transforms.Compose([transforms.ToTensor()])

trainset = torchvision.datasets.MNIST('mnist_data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True)

testset = torchvision.datasets.MNIST('mnist_data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False)

# Define the model with dropout for regularization
class Net(nn.Module):
    def __init__(self):
        super(Net, self).__init__()
        self.conv1 = nn.Conv2d(1, 6, 5)
        self.pool = nn.MaxPool2d(2, 2)
        self.conv2 = nn.Conv2d(6, 16, 5)
        self.fc1 = nn.Linear(16 * 4 * 4, 120)
        self.fc2 = nn.Linear(120, 84)
        self.fc3 = nn.Linear(84, 10)
        self.relu = nn.ReLU()
        self.flatten = nn.Flatten()
        self.dropout = nn.Dropout(0.5)  # Dropout added with 50% rate

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = x.view(-1, 16 * 4 * 4)
        x = F.relu(self.fc1(x))
        x = self.dropout(x)  # Dropout applied after the first fully connected layer
        x = F.relu(self.fc2(x))
        x = self.fc3(x)
        return x

# Device setup
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model = Net().to(device)

# Loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)  # Using Adam optimizer
scheduler = optim.lr_scheduler.StepLR(optimizer, step_size=5, gamma=0.5)  # Learning rate scheduler

def calculate_uncertainty_metrics(outputs):
    outputs_tensor = torch.from_numpy(outputs)
    probabilities = F.softmax(outputs_tensor, dim=1)

    least_confidence = 1 - probabilities.max(dim=1).values.cpu().detach().numpy()

    current_probs = probabilities.clone().detach()
    current_probs[current_probs == 0] = 1e-10

    prediction_entropy = -torch.sum(current_probs * torch.log(current_probs), dim=1).cpu().detach().numpy()

    margin_sampling = 1 - (torch.max(probabilities, dim=1).values.cpu().detach().numpy() - \
                           torch.min(probabilities, dim=1).values.cpu().detach().numpy()) 
    
    return least_confidence, prediction_entropy, margin_sampling

def calculate_diversity_metrics(features, m=5):
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    cosine_similarity = 1 - feature_distances[:, 1:m+1].mean(axis=1)

    l2_distances = pairwise_distances(features.cpu().detach().numpy(), metric='euclidean')
    l2_norm = l2_distances[:, 1:m+1].mean(axis=1)

    return cosine_similarity, l2_norm

def calculate_kl_divergence(outputs, feature_distances, m=5): 
    kl_divergence = [] 
    for i in range(len(outputs)):
        current_sample_prob = F.softmax(outputs[i], dim=0)
        neighbor_indices = feature_distances[i, 1:m+1].astype(int)
        neighbors_prob = torch.mean(F.softmax(outputs[neighbor_indices], dim=1), dim=0)
        epsilon = 1e-10
        current_sample_prob += epsilon
        neighbors_prob += epsilon
        kl_divergence.append(F.kl_div(torch.log(current_sample_prob), neighbors_prob, reduction='batchmean')) 
    return kl_divergence

def calculate_metrics(outputs, features, m=5):
    # Move the tensor to CPU before converting to numpy
    outputs_cpu = outputs.cpu()
    
    # Calculate uncertainty metrics
    least_confidence, prediction_entropy, margin_sampling = calculate_uncertainty_metrics(outputs_cpu.detach().numpy())
    
    # Extend lists with uncertainty metrics
    least_confidence_list.extend(torch.from_numpy(least_confidence))
    prediction_entropy_list.extend(torch.from_numpy(prediction_entropy))
    margin_sampling_list.extend(torch.from_numpy(margin_sampling))
    
    # Calculate diversity metrics
    features_normalized = F.normalize(features, p=2, dim=1)
    cosine_similarity, l2_norm = calculate_diversity_metrics(features_normalized)
    
    # Extend lists with diversity metrics
    cosine_similarity_list.extend(torch.from_numpy(cosine_similarity))
    l2_norm_list.extend(torch.from_numpy(l2_norm))
    
    # Calculate pairwise distances and KL divergence
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    
    # Calculate KL divergence scores
    kl_divergence_scores = calculate_kl_divergence(outputs, feature_distances, m=5)
    
    # Extend the list with KL divergence scores
    kl_divergence_list.extend(kl_divergence_scores)


# Training loop
for epoch in range(10):  # You can increase epochs here
    model.train()
    running_loss = 0.0
    correct = 0
    total = 0

    for inputs, labels in trainloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    avg_loss = running_loss / len(trainloader)
    accuracy = 100 * correct / total
    print(f"Epoch {epoch+1}, Loss: {avg_loss:.4f}, Accuracy: {accuracy:.2f}%")
    
    # Adjust the learning rate using the scheduler
    scheduler.step()
    
# Final testing
model.eval()
correct = 0
total = 0

least_confidence_list= [] # Initialize an empty list to store least confidence values
prediction_entropy_list = [] # Initialize an empty list to store prediction entropy values
margin_sampling_list = [] # Initialize an empty list to store margin sampling values
cosine_similarity_list = [] # Initialize an empty list to store cosine similarity values
l2_norm_list = [] # Initialize an empty list to store L2 norm values
kl_divergence_list = [] # Initialize an empty list to store KL Divergence values

with torch.no_grad():
    for inputs, labels in testloader:
        inputs, labels = inputs.to(device), labels.to(device)
        outputs = model(inputs)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

        # Apply convolutional layer 1 to the input images
        conv1_output = model.conv1(inputs)
        # Apply ReLU activation function
        relu_output = model.relu(conv1_output)
        # Apply max pooling
        maxpool_output = model.pool(relu_output)
        # Apply convolutional layer 2 to the max-pooled output
        conv2_output = model.conv2(maxpool_output)
        # Flatten the output
        features = model.flatten(conv2_output)
        # Reshape the features to have a consistent size
        features = features.view(features.size(0), -1)

        # Calculate metrics using the extracted features and model outputs
        calculate_metrics(outputs, features)

# Calculate accuracy
accuracy = correct / total # Calculate accuracy
print(f"Test Accuracy: {accuracy * 100:.2f}%") # Print the accuracy on the test seT

# Print the average values of uncertainty and diversity measures
print(f"Average Least Confidence: {torch.mean(torch.stack(least_confidence_list))}") # Print the average value of least confidence
print(f"Average Prediction Entropy: {torch.mean(torch.stack(prediction_entropy_list))}") # Print the average value of prediction entropy
print(f"Average Margin Sampling: {torch.mean(torch.stack(margin_sampling_list))}") # Print the average value of margin sampling
print(f"Average Cosine Similarity: {torch.mean(torch.stack(cosine_similarity_list))}") # Print the average value of cosine similarity
print(f"Average L2 Norm: {torch.mean(torch.stack(l2_norm_list))}") # Print the average value of L2 norm
print(f"Average KL Divergence: {torch.mean(torch.stack(kl_divergence_list))}") # Print the average value of KL divergence

Epoch 1, Loss: 0.2159, Accuracy: 93.42%
Epoch 2, Loss: 0.0989, Accuracy: 97.31%
Epoch 3, Loss: 0.0819, Accuracy: 97.67%
Epoch 4, Loss: 0.0749, Accuracy: 98.02%
Epoch 5, Loss: 0.0695, Accuracy: 98.13%
Epoch 6, Loss: 0.0465, Accuracy: 98.71%
Epoch 7, Loss: 0.0436, Accuracy: 98.83%
Epoch 8, Loss: 0.0400, Accuracy: 98.85%
Epoch 9, Loss: 0.0374, Accuracy: 98.98%
Epoch 10, Loss: 0.0363, Accuracy: 99.03%
Test Accuracy: 99.04%
Average Least Confidence: 0.005123747978359461
Average Prediction Entropy: 0.014790687710046768
Average Margin Sampling: 0.005130315665155649
Average Cosine Similarity: 0.7565794587135315
Average L2 Norm: 0.5977974534034729
Average KL Divergence: 1.4561538696289062


An increase in the test accuracy can be seen here

Uncertainty Sampling using least confidence would be the most effective strategy for this dataset and model