# Assignment 3

In [2]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader
import torchvision
from torchvision import datasets,transforms

import numpy as np
import pandas as pd
from sklearn.metrics import pairwise_distances
import matplotlib.pyplot as plt



#### CNN MODEL

In [3]:
class ConvolutionalNeuralNetwork(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 10, 3, 1)
        self.conv2 = nn.Conv2d(10, 25, 3, 1)
        self.fc1 = nn.Linear(25*6*6, 120)
        self.fc2 = nn.Linear(120, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.relu(F.max_pool2d(x, 2, 2))
        x = F.relu(F.max_pool2d(x, 2, 2))
        x = x.view(-1, 25*6*6)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

    def extract_features(self, x):
        x = F.relu(self.conv1(x))
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2, 2)
        x = F.max_pool2d(x, 2, 2)
        x = x.view(x.size(0), -1)
        return x

model = ConvolutionalNeuralNetwork()
model



ConvolutionalNeuralNetwork(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 25, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=900, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)

In [4]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)
transform = transforms.ToTensor()

train_data = datasets.MNIST(root = 'cnn_data', train = True, download = True, transform = transform)
test_data = datasets.MNIST(root = 'cnn_data', train = False, download = True, transform = transform)

train_loader = DataLoader(train_data, batch_size = 10, shuffle = True)
test_loader = DataLoader(test_data, batch_size = 10, shuffle = False)



#### Training and testing

In [5]:
test_correct = 0

for epoch in range(10):
    total_loss = 0
    for i, (X_train, y_train) in enumerate(train_loader):
        y_prediction = model(X_train)
        loss = loss_function(y_prediction, y_train)
        total_loss += loss

        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'epoch: {epoch + 1}     loss: {total_loss / len(train_loader) :.5f}')

with torch.no_grad():
    for i, (X_test, y_test) in enumerate(test_loader):
        y_value = model(X_test)
        loss = loss_function(y_value, y_test)

        test_correct += (torch.max(y_value, 1)[1] == y_test).sum()
        print(f'Accuracy for Custom CNN: {test_correct/len(test_data)*100 :.2f}%')

epoch: 1     loss: 2.30338
epoch: 2     loss: 2.30357
epoch: 3     loss: 2.30377
epoch: 4     loss: 2.30348
epoch: 5     loss: 2.30346
epoch: 6     loss: 2.30387
epoch: 7     loss: 2.30336
epoch: 8     loss: 2.30374
epoch: 9     loss: 2.30372
epoch: 10     loss: 2.30352
Accuracy for Custom CNN: 0.02%
Accuracy for Custom CNN: 0.03%
Accuracy for Custom CNN: 0.04%
Accuracy for Custom CNN: 0.07%
Accuracy for Custom CNN: 0.09%
Accuracy for Custom CNN: 0.10%
Accuracy for Custom CNN: 0.10%
Accuracy for Custom CNN: 0.11%
Accuracy for Custom CNN: 0.12%
Accuracy for Custom CNN: 0.14%
Accuracy for Custom CNN: 0.15%
Accuracy for Custom CNN: 0.15%
Accuracy for Custom CNN: 0.15%
Accuracy for Custom CNN: 0.17%
Accuracy for Custom CNN: 0.19%
Accuracy for Custom CNN: 0.20%
Accuracy for Custom CNN: 0.21%
Accuracy for Custom CNN: 0.23%
Accuracy for Custom CNN: 0.25%
Accuracy for Custom CNN: 0.28%
Accuracy for Custom CNN: 0.31%
Accuracy for Custom CNN: 0.31%
Accuracy for Custom CNN: 0.33%
Accuracy for Cus

#### Pretrained Model

In [6]:
import torch.optim as optim
from torchvision import models

pretrained_model = models.resnet18(weights=models.ResNet18_Weights.IMAGENET1K_V1)

pretrained_model.conv1 = nn.Conv2d(1, 64, kernel_size=7, stride=2, padding=3, bias=False)

pretrained_model.fc = nn.Linear(pretrained_model.fc.in_features, 10)

loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(pretrained_model.parameters(), lr=0.0001)

for param in pretrained_model.parameters():
    param.requires_grad = False

for param in pretrained_model.fc.parameters():
    param.requires_grad = True

test_correct = 0

for epoch in range(5):
    total_loss = 0
    for i, (X_train, y_train) in enumerate(train_loader):
        y_prediction = pretrained_model(X_train)
        loss = loss_function(y_prediction, y_train)
        total_loss += loss
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    print(f'epoch: {epoch + 1}     loss: {total_loss / len(train_loader) :.5f}')

with torch.no_grad():
    for i, (X_test, y_test) in enumerate(test_loader):
        y_value = pretrained_model(X_test)
        loss = loss_function(y_value, y_test)

        test_correct += (torch.max(y_value, 1)[1] == y_test).sum()
        
print(f'Accuracy for Pretrained Model: {test_correct/len(test_data)*100 :.2f}%')

epoch: 1     loss: 1.69756
epoch: 2     loss: 1.44742
epoch: 3     loss: 1.40494
epoch: 4     loss: 1.38315
epoch: 5     loss: 1.37291
Accuracy for Pretrained Model: 55.96%


#### Active Learning Implementation

In [7]:
model = ConvolutionalNeuralNetwork()
model

ConvolutionalNeuralNetwork(
  (conv1): Conv2d(1, 10, kernel_size=(3, 3), stride=(1, 1))
  (conv2): Conv2d(10, 25, kernel_size=(3, 3), stride=(1, 1))
  (fc1): Linear(in_features=900, out_features=120, bias=True)
  (fc2): Linear(in_features=120, out_features=10, bias=True)
)

In [8]:
loss_function = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr = 0.01)

def calculate_least_confidence(outputs):
    probabilities = F.softmax(outputs, dim=1)
    least_confidence = 1 - probabilities.max(dim=1).values.detach().cpu().numpy()
    return least_confidence

def calculate_prediction_entropy(outputs):
    probabilities = F.softmax(outputs, dim=1)
    current_probs = probabilities.clone().detach()
    current_probs[current_probs == 0] = 1e-10
    prediction_entropy = -torch.sum(current_probs * torch.log(current_probs), dim=1).detach().cpu().numpy()
    return prediction_entropy
    
def calculate_margin_sampling(outputs):
    probabilities = F.softmax(outputs, dim=1)
    sorted_probs, _ = probabilities.sort(dim=1, descending=True)
    margin_sampling = (sorted_probs[:, 0] - sorted_probs[:, 1]).detach().cpu().numpy()
    return margin_sampling

def calculate_cosine_similarity(features, m=5):
    feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
    cosine_similarity = 1 - feature_distances[:, 1:m+1].mean(axis=1)
    return cosine_similarity

def calculate_l2_norm(features, m=5):
    l2_distances = pairwise_distances(features.cpu().detach().numpy(), metric='euclidean')
    l2_norm = l2_distances[:, 1:m+1].mean(axis=1)
    return l2_norm

def calculate_kl_divergence(outputs, feature_distances, m=5):
    kl_divergence = []
    for i in range(len(outputs)):
        current_sample_prob = F.softmax(outputs[i], dim=0)
        neighbor_indices = feature_distances[i, 1:m+1].astype(int)
        neighbors_prob = F.softmax(torch.mean(F.softmax(outputs[neighbor_indices], dim=1), dim=0), dim=0)
        kl_divergence.append(F.kl_div(torch.log(current_sample_prob), neighbors_prob, reduction='batchmean').item())
    return kl_divergence

methods = ['least confidence', 'prediction entropy', 'margin sampling','cosine similarity', 'l2 norm', 'kl divergence']

results = {method: [] for method in methods}

for method in methods:
    
    labeled_set = list(range(1000))
    unlabeled_set = list(range(1000, len(train_data)))

    for iteration in range(3):
        
        labeled_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(train_data, labeled_set), batch_size=10, shuffle=True)
        unlabeled_loader = torch.utils.data.DataLoader(torch.utils.data.Subset(train_data, unlabeled_set), batch_size=10, shuffle=False)

        for epochs in range(5):
            for data in labeled_loader:
                X_train, y_train = data
                y_prediction = model(X_train)
                loss = loss_function(y_prediction, y_train)
        
                optimizer.zero_grad()
                loss.backward()
                optimizer.step()

        test_correct = 0
        with torch.no_grad():
            for i, (X_test, y_test) in enumerate(test_loader):
                y_value = model(X_test)
                loss = loss_function(y_value, y_test)

                test_correct += (torch.max(y_value, 1)[1] == y_test).sum()

        print(f'method: {method}     iteration: {iteration + 1}     accuracy: {test_correct/len(test_data)*100 :.2f}%')
        accuracy = test_correct / len(test_data) * 100
        results[method].append(round(accuracy.item(), 2))

        unlabeled_dataset = unlabeled_loader.dataset
        unlabeled_subset = torch.utils.data.Subset(unlabeled_dataset, range(1000))
        unlabeled_subset_loader = torch.utils.data.DataLoader(unlabeled_subset, batch_size=unlabeled_loader.batch_size, shuffle=False)
        
        outputs_list = []
        features_list = []
        for data in unlabeled_subset:
            X, y = data
            outputs = model(X)

            features = model.extract_features(X)

            outputs_list.append(outputs)
            features_list.append(features)

        outputs = torch.cat(outputs_list, dim=0)
        features = torch.cat(features_list, dim=0)

        if (method == 'least confidence'):
            least_confidence = calculate_least_confidence(outputs)
            scores = np.array(least_confidence)
        elif (method == 'prediction entropy'):
            prediction_entropy = calculate_prediction_entropy(outputs)
            scores = -np.array(prediction_entropy)
        elif (method == 'margin sampling'):
            margin_sampling = calculate_margin_sampling(outputs)
            scores = np.array(margin_sampling)
        elif (method == 'cosine similarity'):
            features_normalized = F.normalize(features, p=2, dim=1)
            cosine_similarity = calculate_cosine_similarity(features_normalized)
            scores = -np.array(cosine_similarity)
        elif (method == 'l2 norm'):
            features_normalized = F.normalize(features, p=2, dim=1)
            l2_norm = calculate_l2_norm(features_normalized)
            scores = -np.array(l2_norm)
        elif (method == 'kl divergence'):
            feature_distances = pairwise_distances(features.cpu().detach().numpy(), metric='cosine')
            scores = calculate_kl_divergence(outputs, feature_distances)

        top_indices = np.argsort(scores)[-200:]
        new_samples = np.array(unlabeled_set)[top_indices].tolist()

        labeled_set.extend(new_samples)
        unlabeled_set = list(set(unlabeled_set) - set(new_samples))



method: least confidence     iteration: 1     accuracy: 90.35%
method: least confidence     iteration: 2     accuracy: 92.00%
method: least confidence     iteration: 3     accuracy: 94.63%
method: prediction entropy     iteration: 1     accuracy: 89.19%
method: prediction entropy     iteration: 2     accuracy: 95.35%
method: prediction entropy     iteration: 3     accuracy: 95.46%
method: margin sampling     iteration: 1     accuracy: 95.50%
method: margin sampling     iteration: 2     accuracy: 95.58%
method: margin sampling     iteration: 3     accuracy: 95.60%
method: cosine similarity     iteration: 1     accuracy: 95.68%
method: cosine similarity     iteration: 2     accuracy: 94.87%
method: cosine similarity     iteration: 3     accuracy: 94.78%
method: l2 norm     iteration: 1     accuracy: 95.10%
method: l2 norm     iteration: 2     accuracy: 95.11%
method: l2 norm     iteration: 3     accuracy: 94.01%
method: kl divergence     iteration: 1     accuracy: 94.34%
method: kl diver