In [1]:
# Import required libraries
import torch
import torch.nn as nn
import torch.optim as optim
import torchvision.transforms as transforms
import torchvision.datasets as datasets
from torch.utils.data import DataLoader
import torch.nn.functional as F
from sklearn.metrics import pairwise_distances
import numpy as np

# Define the LeNet-5 architecture with feature extraction
class LeNet5FeatureExtractor(nn.Module):
    def __init__(self, num_classes=10):
        super(LeNet5FeatureExtractor, self).__init__()
        self.conv_layer1 = nn.Conv2d(1, 6, kernel_size=5)  # Convolutional Layer 1
        self.pooling_layer1 = nn.AvgPool2d(kernel_size=2, stride=2)  # Pooling Layer 1
        self.conv_layer2 = nn.Conv2d(6, 16, kernel_size=5)  # Convolutional Layer 2
        self.pooling_layer2 = nn.AvgPool2d(kernel_size=2, stride=2)  # Pooling Layer 2
        self.fc_layer1 = nn.Linear(16 * 4 * 4, 120)  # Fully Connected Layer 1
        self.fc_layer2 = nn.Linear(120, 84)  # Fully Connected Layer 2
        self.output_layer = nn.Linear(84, num_classes)  # Output Layer

    def forward(self, x):
        x = self.pooling_layer1(F.relu(self.conv_layer1(x)))  # Conv1 -> ReLU -> Pool1
        x = self.pooling_layer2(F.relu(self.conv_layer2(x)))  # Conv2 -> ReLU -> Pool2
        extracted_features = x.view(-1, 16 * 4 * 4)  # Flatten
        x = F.relu(self.fc_layer1(extracted_features))  # FC1 -> ReLU
        x = F.relu(self.fc_layer2(x))  # FC2 -> ReLU
        logits = self.output_layer(x)  # Output
        return logits, extracted_features  # Return logits and features

# Set device (GPU if available, else CPU)
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Hyperparameters
batch_size = 64  # Increased batch size
learning_rate = 0.0005  # Adjusted learning rate
epochs = 15  # Increased number of epochs

# Data preprocessing
preprocess = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))  # Normalize grayscale images
])

# Load MNIST dataset
train_data = datasets.MNIST(root='./dataset', train=True, download=True, transform=preprocess)
test_data = datasets.MNIST(root='./dataset', train=False, download=True, transform=preprocess)

train_loader = DataLoader(train_data, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_data, batch_size=batch_size, shuffle=False)

# Initialize model, loss function, and optimizer
model = LeNet5FeatureExtractor().to(device)
loss_function = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

# Training phase
for epoch in range(epochs):
    model.train()
    total_loss = 0.0
    for inputs, targets in train_loader:
        inputs, targets = inputs.to(device), targets.to(device)

        # Reset gradients
        optimizer.zero_grad()

        # Forward pass
        predictions, _ = model(inputs)
        loss = loss_function(predictions, targets)

        # Backward pass
        loss.backward()
        optimizer.step()

        total_loss += loss.item()

    print(f"Epoch {epoch + 1}/{epochs}, Average Loss: {total_loss / len(train_loader):.4f}")

# Define uncertainty metrics
def compute_uncertainty_metrics(logits):
    probabilities = F.softmax(logits, dim=1)

    # Least Confidence
    confidence_scores = 1 - probabilities.max(dim=1).values.cpu().numpy()

    # Prediction Entropy
    probabilities[probabilities == 0] = 1e-10  # Avoid log(0)
    entropy = -torch.sum(probabilities * torch.log(probabilities), dim=1).cpu().numpy()

    # Margin Sampling
    sorted_probs, _ = torch.sort(probabilities, descending=True)
    margins = 1 - (sorted_probs[:, 0] - sorted_probs[:, 1]).cpu().numpy()

    return confidence_scores, entropy, margins

# Define diversity metrics
def compute_diversity_metrics(feature_vectors, top_k=5):
    cosine_distances = pairwise_distances(feature_vectors.cpu().numpy(), metric='cosine')
    cosine_similarity = 1 - np.mean(cosine_distances[:, 1:top_k + 1], axis=1)

    l2_distances = pairwise_distances(feature_vectors.cpu().numpy(), metric='euclidean')
    avg_l2_distances = np.mean(l2_distances[:, 1:top_k + 1], axis=1)

    return cosine_similarity, avg_l2_distances

# Define KL divergence
def compute_kl_divergence(logits, pairwise_similarities, top_k=5):
    epsilon = 1e-10
    kl_scores = []

    for i in range(len(logits)):
        current_prob = F.softmax(logits[i], dim=0)
        current_log_prob = torch.log(current_prob + epsilon)

        neighbor_indices = pairwise_similarities[i, 1:top_k + 1].astype(int)
        neighbor_probs = torch.mean(F.softmax(logits[neighbor_indices], dim=1), dim=0) + epsilon

        kl_score = F.kl_div(current_log_prob, neighbor_probs, reduction='batchmean').item()
        kl_scores.append(kl_score)

    return kl_scores

# Evaluation phase
model.eval()
total_correct = 0
num_samples = 0

# Metrics storage
confidence_scores_list = []
entropy_list = []
margins_list = []
cosine_similarity_list = []
l2_distances_list = []
kl_divergence_list = []

with torch.no_grad():
    for inputs, targets in test_loader:
        inputs, targets = inputs.to(device), targets.to(device)
        logits, features = model(inputs)

        _, predictions = torch.max(logits, dim=1)
        total_correct += (predictions == targets).sum().item()
        num_samples += targets.size(0)

        # Compute metrics
        conf_scores, entropy, margins = compute_uncertainty_metrics(logits)
        confidence_scores_list.extend(conf_scores)
        entropy_list.extend(entropy)
        margins_list.extend(margins)

        features_normalized = F.normalize(features, p=2, dim=1)
        cosine_sim, l2_dist = compute_diversity_metrics(features_normalized)
        cosine_similarity_list.extend(cosine_sim)
        l2_distances_list.extend(l2_dist)

        feature_dists = pairwise_distances(features.cpu().numpy(), metric='cosine')
        kl_scores = compute_kl_divergence(logits, feature_dists)
        kl_divergence_list.extend(kl_scores)

# Compute accuracy
accuracy = total_correct / num_samples * 100
print(f"Test Accuracy: {accuracy:.2f}%")

# Report metrics
print(f"Average Confidence Score: {np.mean(confidence_scores_list):.4f}")
print(f"Average Prediction Entropy: {np.mean(entropy_list):.4f}")
print(f"Average Margin Sampling: {np.mean(margins_list):.4f}")
print(f"Average Cosine Similarity: {np.mean(cosine_similarity_list):.4f}")
print(f"Average L2 Distance: {np.mean(l2_distances_list):.4f}")
print(f"Average KL Divergence: {np.mean(kl_divergence_list):.4f}")


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-images-idx3-ubyte.gz to ./dataset\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./dataset\MNIST\raw\train-images-idx3-ubyte.gz to ./dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/train-labels-idx1-ubyte.gz to ./dataset\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./dataset\MNIST\raw\train-labels-idx1-ubyte.gz to ./dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-images-idx3-ubyte.gz to ./dataset\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./dataset\MNIST\raw\t10k-images-idx3-ubyte.gz to ./dataset\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Failed to download (trying next):
HTTP Error 403: Forbidden

Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz
Downloading https://ossci-datasets.s3.amazonaws.com/mnist/t10k-labels-idx1-ubyte.gz to ./dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%


Extracting ./dataset\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./dataset\MNIST\raw

Epoch 1/15, Average Loss: 0.3738
Epoch 2/15, Average Loss: 0.1055
Epoch 3/15, Average Loss: 0.0754
Epoch 4/15, Average Loss: 0.0605
Epoch 5/15, Average Loss: 0.0514
Epoch 6/15, Average Loss: 0.0450
Epoch 7/15, Average Loss: 0.0384
Epoch 8/15, Average Loss: 0.0331
Epoch 9/15, Average Loss: 0.0295
Epoch 10/15, Average Loss: 0.0273
Epoch 11/15, Average Loss: 0.0243
Epoch 12/15, Average Loss: 0.0214
Epoch 13/15, Average Loss: 0.0201
Epoch 14/15, Average Loss: 0.0184
Epoch 15/15, Average Loss: 0.0165
Test Accuracy: 99.10%
Average Confidence Score: 0.0061
Average Prediction Entropy: 0.0184
Average Margin Sampling: 0.0114
Average Cosine Similarity: 0.4843
Average L2 Distance: 0.9930
Average KL Divergence: 1.6027
