In [None]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


FINAL ASSIGNMENT OF ACTIVE LEARNING PROJECT

In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision.datasets import ImageFolder
from torchvision import transforms
from sklearn.metrics import pairwise_distances
from pathlib import Path
import torchvision


device=torch.device('cuda')
num_epochs = 10
batch_size = 64
learning_rate = 0.001
transform=transforms.Compose([transforms.ToTensor(),transforms.Normalize((0.5,),(0.5,))])
train_dataset=torchvision.datasets.MNIST(root='./data',train=True,download=True,transform=transform)
test_dataset=torchvision.datasets.MNIST(root='./data',train=False,download=True,transform=transform)
train_loader=torch.utils.data.DataLoader(dataset=train_dataset,batch_size=batch_size,shuffle=True)
test_loader=torch.utils.data.DataLoader(dataset=test_dataset,batch_size=batch_size,shuffle=False)
class CNN(nn.Module):
  def __init__(self):
    super(CNN,self).__init__()
    self.conv1=nn.Conv2d(1,32,kernel_size=3,padding=1)
    self.relu1=nn.ReLU()
    self.pool1=nn.MaxPool2d(kernel_size=2,stride=2)
    self.conv2=nn.Conv2d(32,64,kernel_size=3,padding=1)
    self.relu2=nn.ReLU()
    self.pool2=nn.MaxPool2d(kernel_size=2,stride=2)
    self.fc1=nn.Linear(64*7*7,128)
    self.relu3=nn.ReLU()
    self.fc2=nn.Linear(128,10)
  def forward(self,x):
    x=self.conv1(x)
    x=self.relu1(x)
    x=self.pool1(x)
    x=self.conv2(x)
    x=self.relu2(x)
    x=self.pool2(x)
    x=x.view(x.size(0),-1)
    x=self.fc1(x)
    x=self.relu3(x)
    x=self.fc2(x)
    return x
cnn=CNN().to(device)
num_classes = 10
model = CNN().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)
for epoch in range(num_epochs):
    model.train()
    running_loss = 0.0
    for images, labels in train_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        loss = criterion(outputs, labels)
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
def calculate_metrics(outputs, features, m=5):
    outputs_cpu = outputs.detach().cpu()
    least_confidence, prediction_entropy, margin_sampling = calculate_uncertainty_metrics(outputs_cpu)
    least_confidence_list.extend(torch.from_numpy(least_confidence))
    prediction_entropy_list.extend(torch.from_numpy(prediction_entropy))
    margin_sampling_list.extend(torch.from_numpy(margin_sampling))
    features_normalized = F.normalize(features, p=2, dim=1)
    cosine_similarity, l2_norm = calculate_diversity_metrics(features_normalized)
    cosine_similarity_list.extend(torch.from_numpy(cosine_similarity))
    l2_norm_list.extend(torch.from_numpy(l2_norm))
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    kl_divergence_scores = calculate_kl_divergence(outputs_cpu, feature_distances, m=5)
    kl_divergence_list.extend(kl_divergence_scores)
def calculate_uncertainty_metrics(outputs):
    probabilities = F.softmax(outputs, dim=1)
    least_confidence = 1 - probabilities.max(dim=1).values.cpu().detach().numpy()
    current_probs = probabilities.clone().detach()
    current_probs[current_probs == 0] = 1e-10
    prediction_entropy = -torch.sum(current_probs * torch.log(current_probs), dim=1).cpu().detach().numpy()
    margin_sampling = 1 - (torch.max(probabilities, dim=1).values.cpu().detach().numpy() - \
                           torch.min(probabilities, dim=1).values.cpu().detach().numpy())
    return least_confidence, prediction_entropy, margin_sampling

def calculate_diversity_metrics(features, m=5):
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    cosine_similarity = 1 - feature_distances[:, 1:m+1].mean(axis=1)
    l2_distances = pairwise_distances(features.cpu().detach().numpy(), metric='euclidean')
    l2_norm = l2_distances[:, 1:m+1].mean(axis=1)
    return cosine_similarity, l2_norm
def calculate_kl_divergence(outputs, feature_distances, m=5):
    kl_divergence = []
    for i in range(len(outputs)):
        current_sample_prob = F.softmax(outputs[i], dim=0)
        neighbor_indices = feature_distances[i, 1:m+1].astype(int)
        neighbors_prob = torch.mean(F.softmax(outputs[neighbor_indices], dim=1), dim=0)
        epsilon = 1e-10
        current_sample_prob += epsilon
        neighbors_prob += epsilon
        kl_divergence.append(F.kl_div(torch.log(current_sample_prob), neighbors_prob, reduction='batchmean'))  # KL Divergence
    return kl_divergence
least_confidence_list = []
prediction_entropy_list = []
margin_sampling_list = []
cosine_similarity_list = []
l2_norm_list = []
kl_divergence_list = []
model.eval()
with torch.no_grad():
    correct = 0
    total = 0
    for images, labels in test_loader:
        images, labels = images.to(device), labels.to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        features = cnn.conv1(images)
        features = F.max_pool2d(F.relu(features), 2, 2)
        features = cnn.conv2(features)
        features = F.max_pool2d(F.relu(features), 2, 2)
        features = features.view(features.size(0), -1)
        calculate_metrics(outputs, features)

    print(f"Test Accuracy: {100 * correct / total:.2f}%")
    print(f"Average Least Confidence: {torch.mean(torch.stack(least_confidence_list))}")
    print(f"Average Prediction Entropy: {torch.mean(torch.stack(prediction_entropy_list))}")
    print(f"Average Margin Sampling: {torch.mean(torch.stack(margin_sampling_list))}")
    print(f"Average Cosine Similarity: {torch.mean(torch.stack(cosine_similarity_list))}")
    print(f"Average L2 Norm: {torch.mean(torch.stack(l2_norm_list))}")
    print(f"Average KL Divergence: {torch.mean(torch.stack(kl_divergence_list))}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9.91M/9.91M [00:00<00:00, 11.5MB/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28.9k/28.9k [00:00<00:00, 345kB/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1.65M/1.65M [00:00<00:00, 3.22MB/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4.54k/4.54k [00:00<00:00, 4.34MB/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw

Test Accuracy: 99.07%
Average Least Confidence: 0.003541358979418874
Average Prediction Entropy: 0.009954983368515968
Average Margin Sampling: 0.003541397163644433
Average Cosine Similarity: 0.91835618019104
Average L2 Norm: 0.3963404893875122
Average KL Divergence: 1.9064706563949585
