In [1]:
import torch
import numpy as np
from torch.utils.data import DataLoader, Subset
import torch.optim as optim
import torch.nn as nn
from IPython import get_ipython
from IPython.display import display
from sklearn.metrics.pairwise import pairwise_distances
import torch.nn.functional as F
from torchvision import datasets, transforms
import copy



def active_learning_iteration(model, full_dataset, strategy, num_iterations, query_size, epochs_per_iteration, learning_rate=0.001):

    optimizer = optim.Adam(model.parameters(), lr=learning_rate)
    criterion = nn.CrossEntropyLoss()

    initial_labeled_size = 2000
    labeled_indices = list(range(initial_labeled_size))
    unlabeled_indices = list(range(initial_labeled_size, len(full_dataset)))

    labeled_dataset = Subset(full_dataset, labeled_indices)

    unlabeled_dataset = Subset(full_dataset, unlabeled_indices)
    labeled_dataset = Subset(full_dataset, labeled_indices)
    for iteration in range(num_iterations):
        print(f"Iteration {iteration + 1}")

        unlabeled_subset = Subset(full_dataset, unlabeled_indices)
        unlabeled_loader = DataLoader(unlabeled_subset, batch_size=64, shuffle=False)

        model.eval()
        outputs_list = []
        features_list = []
        with torch.no_grad():
            for images, _ in unlabeled_loader:
                outputs, features = model(images)
                outputs_list.append(outputs)
                features_list.append(features)
        outputs = torch.cat(outputs_list, dim=0)
        features = torch.cat(features_list, dim = 0)



        if strategy == margin_sampling:
            result = strategy(outputs)
            result = np.ascontiguousarray(result)
            query_indices = np.flip(np.argsort(result))[:query_size]

        elif strategy == prediction_entropy or strategy == least_confidence:

            result = strategy(outputs)

            result = np.ascontiguousarray(result)
            query_indices = np.flip(np.argsort(result))[:query_size]

        elif strategy == calculate_kl_divergence:
            feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
            result = strategy(outputs, feature_distances, 5)
            query_indices = np.flip(np.argsort(result))[:query_size]


        elif strategy == L2  or strategy ==  cosine_similarity:
            result = strategy(features, 5)
            query_indices = np.argsort(result)[:query_size]



        selected_unlabeled_indices = [unlabeled_indices[i] for i in query_indices]

        labeled_indices.extend(selected_unlabeled_indices)
        unlabeled_indices = [idx for idx in unlabeled_indices if idx not in selected_unlabeled_indices]

        updated_labeled_dataset = Subset(full_dataset, labeled_indices)
        updated_labeled_loader = DataLoader(updated_labeled_dataset, batch_size=64, shuffle=True)

        model.train()
        for epoch in range(epochs_per_iteration):
            running_loss = 0.0
            for images, labels in updated_labeled_loader:
                optimizer.zero_grad()
                outputs, _ = model(images)
                loss = criterion(outputs, labels)
                loss.backward()
                optimizer.step()
                running_loss += loss.item()

            print(f"Epoch {epoch + 1}, Loss: {running_loss / len(updated_labeled_loader):.4f}")

    return len(labeled_indices)


In [2]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(3, 32, kernel_size=3)
        self.conv2 = nn.Conv2d(32, 64, kernel_size=3)
        self.fc1 = nn.Linear(64 * 6 * 6, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        out = F.relu(F.max_pool2d(self.conv1(x), 2))
        out  = F.relu(F.max_pool2d(self.conv2(out), 2))
        out = out.view(out.size(0), -1)
        features  = F.relu(self.fc1(out))
        out = self.fc2(features)
        features = features.view(features.size(0), -1)
        return out,features

query_size = 300
epochs_per_iteration = 5
num_iterations = 10
model = CNNModel()

Functions For Uncertainty-based Querying Strategies

In [3]:


def least_confidence(outputs):
    probabilities = torch.softmax(outputs, dim=1)
    max_probabilities, _ = torch.max(probabilities, dim=1)
    least_conf = 1 - max_probabilities
    return least_conf.cpu().numpy()
def prediction_entropy(outputs):
    probabilities = F.softmax(outputs, dim=1)
    probabilities = torch.clamp(probabilities, min=1e-7, max=1.0)
    log_probabilities = torch.log(probabilities)
    entropy = -torch.sum(probabilities * log_probabilities, dim=1)
    return entropy
def margin_sampling(outputs):
    probabilities = F.softmax(outputs, dim=1)
    top_two_probs, _ = torch.topk(probabilities, 2, dim=1)
    margin = (top_two_probs[:,0] - top_two_probs[:,1]).detach().cpu().numpy()
    return margin


Loading CIFAR10 Dataset

In [4]:
import torch
from torchvision import datasets, transforms

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

train_dataset = datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)






Downloading https://www.cs.toronto.edu/~kriz/cifar-10-python.tar.gz to ./data/cifar-10-python.tar.gz


100%|██████████| 170M/170M [00:05<00:00, 29.4MB/s]


Extracting ./data/cifar-10-python.tar.gz to ./data
Files already downloaded and verified


Least Confidence

In [5]:

model = CNNModel()
model.eval()
labeled_indices = active_learning_iteration(
    model, train_dataset, least_confidence, num_iterations, query_size, epochs_per_iteration
)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
total = 0
correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs, _  = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")
print(f"Total Training Instances Used :" ,labeled_indices)

Iteration 1
Epoch 1, Loss: 2.0833
Epoch 2, Loss: 1.7783
Epoch 3, Loss: 1.6112
Epoch 4, Loss: 1.4485
Epoch 5, Loss: 1.3797
Iteration 2
Epoch 1, Loss: 1.3735
Epoch 2, Loss: 1.2788
Epoch 3, Loss: 1.1869
Epoch 4, Loss: 1.0765
Epoch 5, Loss: 0.9767
Iteration 3
Epoch 1, Loss: 1.0277
Epoch 2, Loss: 0.8846
Epoch 3, Loss: 0.7957
Epoch 4, Loss: 0.7025
Epoch 5, Loss: 0.5844
Iteration 4
Epoch 1, Loss: 0.6738
Epoch 2, Loss: 0.5541
Epoch 3, Loss: 0.4252
Epoch 4, Loss: 0.3564
Epoch 5, Loss: 0.2725
Iteration 5
Epoch 1, Loss: 0.4400
Epoch 2, Loss: 0.3213
Epoch 3, Loss: 0.2307
Epoch 4, Loss: 0.1850
Epoch 5, Loss: 0.1281
Iteration 6
Epoch 1, Loss: 0.3252
Epoch 2, Loss: 0.2297
Epoch 3, Loss: 0.1544
Epoch 4, Loss: 0.0891
Epoch 5, Loss: 0.0569
Iteration 7
Epoch 1, Loss: 0.3826
Epoch 2, Loss: 0.2276
Epoch 3, Loss: 0.1079
Epoch 4, Loss: 0.0632
Epoch 5, Loss: 0.0581
Iteration 8
Epoch 1, Loss: 0.3315
Epoch 2, Loss: 0.1620
Epoch 3, Loss: 0.0777
Epoch 4, Loss: 0.0410
Epoch 5, Loss: 0.0211
Iteration 9
Epoch 1, Los

Prediction Entropy

In [6]:
model = CNNModel()
model.eval()
labeled_indices = active_learning_iteration(
    model, train_dataset, prediction_entropy, num_iterations, query_size, epochs_per_iteration
)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
total = 0
correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs,_  = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print(f"Test Accuracy: {100 * correct / total:.2f}%")
print(f"Total Training Instances Used :" ,labeled_indices)

Iteration 1
Epoch 1, Loss: 2.0829
Epoch 2, Loss: 1.7789
Epoch 3, Loss: 1.5826
Epoch 4, Loss: 1.4435
Epoch 5, Loss: 1.3333
Iteration 2
Epoch 1, Loss: 1.3697
Epoch 2, Loss: 1.2910
Epoch 3, Loss: 1.2296
Epoch 4, Loss: 1.1290
Epoch 5, Loss: 1.0451
Iteration 3
Epoch 1, Loss: 1.1030
Epoch 2, Loss: 1.0152
Epoch 3, Loss: 0.8866
Epoch 4, Loss: 0.8151
Epoch 5, Loss: 0.7134
Iteration 4
Epoch 1, Loss: 0.8006
Epoch 2, Loss: 0.7032
Epoch 3, Loss: 0.5737
Epoch 4, Loss: 0.5087
Epoch 5, Loss: 0.4154
Iteration 5
Epoch 1, Loss: 0.5631
Epoch 2, Loss: 0.4512
Epoch 3, Loss: 0.3582
Epoch 4, Loss: 0.2914
Epoch 5, Loss: 0.2162
Iteration 6
Epoch 1, Loss: 0.4279
Epoch 2, Loss: 0.2989
Epoch 3, Loss: 0.2254
Epoch 4, Loss: 0.1706
Epoch 5, Loss: 0.0990
Iteration 7
Epoch 1, Loss: 0.3372
Epoch 2, Loss: 0.3072
Epoch 3, Loss: 0.1557
Epoch 4, Loss: 0.1803
Epoch 5, Loss: 0.0816
Iteration 8
Epoch 1, Loss: 0.3220
Epoch 2, Loss: 0.2125
Epoch 3, Loss: 0.1229
Epoch 4, Loss: 0.0680
Epoch 5, Loss: 0.0423
Iteration 9
Epoch 1, Los

Margin Sampling

In [7]:
model = CNNModel()
model.eval()
labeled_indices = active_learning_iteration(
    model, train_dataset, margin_sampling, num_iterations, query_size, epochs_per_iteration
)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
total = 0
correct = 0
with torch.no_grad():
    for images, labels in test_loader:
        outputs,_  = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
print(f"Test Accuracy: {100 * correct / total:.2f}%")
print(f"Total Training Instances Used :" ,labeled_indices)

Iteration 1
Epoch 1, Loss: 2.0990
Epoch 2, Loss: 1.7814
Epoch 3, Loss: 1.5891
Epoch 4, Loss: 1.4440
Epoch 5, Loss: 1.3524
Iteration 2
Epoch 1, Loss: 1.2167
Epoch 2, Loss: 1.1153
Epoch 3, Loss: 1.0227
Epoch 4, Loss: 0.9509
Epoch 5, Loss: 0.8550
Iteration 3
Epoch 1, Loss: 0.7817
Epoch 2, Loss: 0.6998
Epoch 3, Loss: 0.5990
Epoch 4, Loss: 0.5319
Epoch 5, Loss: 0.4410
Iteration 4
Epoch 1, Loss: 0.3831
Epoch 2, Loss: 0.3439
Epoch 3, Loss: 0.2741
Epoch 4, Loss: 0.2344
Epoch 5, Loss: 0.1610
Iteration 5
Epoch 1, Loss: 0.1781
Epoch 2, Loss: 0.1299
Epoch 3, Loss: 0.0863
Epoch 4, Loss: 0.0610
Epoch 5, Loss: 0.0370
Iteration 6
Epoch 1, Loss: 0.2149
Epoch 2, Loss: 0.1225
Epoch 3, Loss: 0.0691
Epoch 4, Loss: 0.0378
Epoch 5, Loss: 0.0211
Iteration 7
Epoch 1, Loss: 0.1811
Epoch 2, Loss: 0.1140
Epoch 3, Loss: 0.0527
Epoch 4, Loss: 0.0307
Epoch 5, Loss: 0.0135
Iteration 8
Epoch 1, Loss: 0.1906
Epoch 2, Loss: 0.0705
Epoch 3, Loss: 0.0354
Epoch 4, Loss: 0.0130
Epoch 5, Loss: 0.0078
Iteration 9
Epoch 1, Los

I have implemented the functions for diversity-metrics but it was showing RAM ran out of memory error. I have tried to fix it multiple times but have not succeeded.


In [None]:

# from sklearn.metrics import pairwise_distances

# def L2(features, m=5):
#     # Compute pairwise Euclidean distances
#     l2_distances = pairwise_distances(features.cpu().detach().numpy(), metric='euclidean')
#     l2_norm = l2_distances[:, 1:m+1].mean(axis=1)
#     return l2_norm

# def cosine_similarity(features, m=5):
#     features_normalized = F.normalize(features, p=2, dim=1)
#     feature_distances = pairwise_distances(features_normalized.cpu().detach().numpy(), metric='cosine')
#     return 1 - np.mean(feature_distances[:, 1:m+1], axis=1)

# def calculate_kl_divergence(outputs, features, m=5):
#     features_normalized = F.normalize(features, p=2, dim=1)
#     feature_distances = pairwise_distances(features_normalized.cpu().detach().numpy(), metric='cosine')
#     neighbor_indices = np.argsort(feature_distances, axis=1)[:, 1:m+1]
#     kl_divergence = []

#     for i in range(len(outputs)):
#         current_sample_prob = F.softmax(outputs[i], dim=0)
#         neighbors_probs = F.softmax(outputs[neighbor_indices[i]], dim=1)
#         mean_neighbors_prob = torch.mean(neighbors_probs, dim=0)
#         kl_div = F.kl_div(
#             torch.log(torch.clamp(current_sample_prob, min=1e-10)),
#             torch.clamp(mean_neighbors_prob, min=1e-10),
#             reduction='batchmean'
#         )
#         kl_divergence.append(kl_div)

#     return torch.stack(kl_divergence).cpu().numpy()

# Cosine Similarity

# model = CNNModel()
# model.eval()
# labeled_indices = active_learning_iteration(
#     model, train_dataset, cosine_similarity, num_iterations, query_size, epochs_per_iteration
# )
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# total = 0
# correct = 0
# with torch.no_grad():
#     for images, labels in test_loader:
#         outputs,_ = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# print(f"Test Accuracy: {100 * correct / total:.2f}%")
# print(f"Total Training Instances Used :" ,labeled_indices)

# KL - Divergence

# model = CNNModel()
# model.eval()
# labeled_indices = active_learning_iteration(
#     model, train_dataset, calculate_kl_divergence, num_iterations, query_size, epochs_per_iteration
# )
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# total = 0
# correct = 0
# with torch.no_grad():
#     for images, labels in test_loader:
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# print(f"Test Accuracy: {100 * correct / total:.2f}%")
# print(f"Total Training Instances Used :" ,labeled_indices)

# L2 Norm

# model = CNNModel()
# model.eval()
# labeled_indices = active_learning_iteration(
#     model, train_dataset, L2, num_iterations, query_size, epochs_per_iteration
# )
# test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)
# total = 0
# correct = 0
# with torch.no_grad():
#     for images, labels in test_loader:
#         outputs = model(images)
#         _, predicted = torch.max(outputs.data, 1)
#         total += labels.size(0)
#         correct += (predicted == labels).sum().item()

# print(f"Test Accuracy: {100 * correct / total:.2f}%")
# print(f"Total Training Instances Used :" ,labeled_indices)


Training the custom model fully on train dataset

In [10]:
import torchvision
from torchvision import datasets, transforms
trainset = torchvision.datasets.CIFAR10(root='./data', train=True, download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=4, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=4, shuffle=False, num_workers=2)
model = CNNModel()
optimizer = optim.Adam(model.parameters(), lr=0.001)
criterion = nn.CrossEntropyLoss()
epochs = 5


def calculate_diversity_metrics(features, m=5):
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    cosine_similarity = 1 - feature_distances[:, 1:m+1].mean(axis=1)
    l2_distances = pairwise_distances(features.cpu().detach().numpy(), metric='euclidean')
    l2_norm = l2_distances[:, 1:m+1].mean(axis=1)
    return cosine_similarity, l2_norm


def calculate_kl_divergence(outputs, feature_distances, m=5):
    kl_divergence = []
    for i in range(len(outputs)):
        current_sample_prob = F.softmax(outputs[i], dim=0)
        neighbor_indices = feature_distances[i, 1:m+1].astype(int)
        neighbors_prob = torch.mean(F.softmax(outputs[neighbor_indices], dim=1), dim=0)
        kl_divergence.append(F.kl_div(torch.log(current_sample_prob), neighbors_prob, reduction='batchmean'))

    return kl_divergence

def calculate_metrics(outputs, features, m=5):
    features_normalized = F.normalize(features, p=2, dim=1)
    cosine_similarity, l2_norm = calculate_diversity_metrics(features_normalized)
    cosine_similarity_list.extend(torch.from_numpy(cosine_similarity))
    l2_norm_list.extend(torch.from_numpy(l2_norm))
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    kl_divergence_scores = calculate_kl_divergence(outputs, feature_distances, m=5)
    kl_divergence_list.extend(kl_divergence_scores)

for epoch in range(epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data
        optimizer.zero_grad()
        outputs,_ = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()
        running_loss += loss.item()

    print(f"Epoch {epoch+1}/{epochs}, Loss: {running_loss / len(trainloader)}")



Files already downloaded and verified
Files already downloaded and verified
Epoch 1/5, Loss: 1.3137108526310324
Epoch 2/5, Loss: 0.9914887586571276
Epoch 3/5, Loss: 0.8693068722894415
Epoch 4/5, Loss: 0.7853797336323746
Epoch 5/5, Loss: 0.7178750201885822


Calculating Accuracy of the fully trained custom model and its average Diversity Metrics

In [11]:
correct = 0
total = 0
cosine_similarity_list = []
l2_norm_list = []
kl_divergence_list = []

with torch.no_grad():
    for data in testloader:
        images, labels = data
        outputs, features = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()
        calculate_metrics(outputs, features)

accuracy = 100 * correct / total
print(f"Accuracy of fully trained custom model on trained set: {accuracy:.2f}%")
print(f"Average Cosine Similarity: {torch.mean(torch.stack(cosine_similarity_list))}")
print(f"Average L2 Norm: {torch.mean(torch.stack(l2_norm_list))}")
print(f"Average KL Divergence: {torch.mean(torch.stack(kl_divergence_list))}")

Accuracy of fully trained custom model on trained set: 68.14%
Average Cosine Similarity: 0.4673972725868225
Average L2 Norm: 0.8828698992729187
Average KL Divergence: 0.45189204812049866


In [19]:
import torchvision
import torchvision.transforms as transforms
from torchvision.models import resnet50

transform = transforms.Compose([
    transforms.Resize(224),
    transforms.ToTensor(),
    transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))
])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=64, shuffle=True, num_workers=2)

testset = torchvision.datasets.CIFAR10(root='./data', train=False, download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=64, shuffle=False, num_workers=2)

model = resnet50(pretrained=True)

num_ftrs = model.fc.in_features
model.fc = nn.Linear(num_ftrs, 10)

device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
model = model.to(device)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

num_epochs = 5

for epoch in range(num_epochs):
    running_loss = 0.0
    for i, data in enumerate(trainloader, 0):
        inputs, labels = data[0].to(device), data[1].to(device)

        optimizer.zero_grad()

        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()


    print(f"Epoch {epoch + 1},  Loss: {running_loss / 200:.4f}")
    running_loss = 0.0


correct = 0
total = 0
with torch.no_grad():
    for data in testloader:
        images, labels = data[0].to(device), data[1].to(device)
        outputs = model(images)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

print('Accuracy of the RESNET50 : %d %%' % (100 * correct / total))

Files already downloaded and verified
Files already downloaded and verified
Epoch 1,  Loss: 3.0638
Epoch 2,  Loss: 1.7790
Epoch 3,  Loss: 1.3519
Epoch 4,  Loss: 1.0774
Epoch 5,  Loss: 0.8784
Accuracy of the RESNET50 : 88 %
