In [3]:
"""
Libraries

"""

import math

import matplotlib.pyplot as plt

import numpy as np

from sklearn.metrics import adjusted_rand_score, normalized_mutual_info_score, silhouette_score

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.nn.init as init
import torch.optim as optim
from torch.utils.data import DataLoader

import torchvision


In [4]:
"""
Setting the hyperparameters

"""

num_epochs: int = 10
batch_size: int = 32    # Should be set to a power of 2.
# Learning rate
lr:         float = 0.001
# Trade-off parameter for mutual information and smooth regularization
lam:        float = 0.1


In [5]:
"""
Data Preprocessing

"""
#TODO Preprocess the AILARON dataset to a suitable format.

# #TODO Implement custome dataset for AILARON data. Should inherit from torch.utils.data.Dataset
# class AILARONDataset(torchvision.Dataset):

#     def __init__(self):
#         # Load data
#         pass

#     def __getitem__(self, index):
#         # TODO
#         pass
#     def __len__(self):
#         # TODO
#         pass 

# ailaron_train = AILARONDataset()
# dataloader = DataLoader(dataset=ailaron_train, batch_size=batch_size, shuffle=True)

# Load MNIST dataset, normalizes data and transform to tensor.
mnist_train = torchvision.datasets.MNIST(root='./data', train=True, download=True, transform=torchvision.transforms.ToTensor())

# # Get a random image from the dataset
# image, label = mnist_train[np.random.randint(0, len(mnist_train))]

# # Plot the image
# plt.imshow(image[0], cmap='gray')
# plt.title(f'Label: {label}')
# plt.show()

# Create DataLoader
train_loader = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)

In [57]:
"""
Conditional probability modeled as a Deep Neural Network

"""

class NeuralNet(nn.Module):
    def __init__(self):
        super(NeuralNet, self).__init__()
        
        # Add first fully connected layer with 28 * 28 = 784 input neurons and 1200 output neurons
        self.fc1 = nn.Linear(28 * 28, 1200)
        # Initialize the weights of the first fully connected layer using the He normal initialization
        init.kaiming_normal_(self.fc1.weight, nonlinearity='relu')
        # Add first batch normalization layer with 1200 neurons and epsilon = 2e-5
        self.bn1   = nn.BatchNorm1d(1200, eps=2e-5)
        self.bn1_F = nn.BatchNorm1d(1200, eps=2e-5, affine=False)
        # Add first ReLU activation function
        self.relu1 = nn.ReLU()
        
        self.fc2 = nn.Linear(1200, 1200)
        init.kaiming_normal_(self.fc2.weight, nonlinearity='relu')
        self.bn2   = nn.BatchNorm1d(1200, eps=2e-5)
        self.bn2_F = nn.BatchNorm1d(1200, eps=2e-5, affine=False)

        self.relu2 = nn.ReLU()
        
        # Add output layer of size 10 
        self.fc3 = nn.Linear(1200, 10)
        init.kaiming_normal_(self.fc3.weight, nonlinearity='linear')
        
    # Define the forward pass through the network
    def forward(self, x):
        # Pass the input through the first fully connected layer
        x = self.fc1(x)
        # Pass the output of the first fully connected layer through the first batch normalization layer
        x = self.bn1(x)
        # Pass the output of the first batch normalization layer through the first ReLU activation function
        x = self.relu1(x)
        
        x = self.fc2(x)
        x = self.bn2(x)
        x = self.relu2(x)
        
        x = self.fc3(x)
        
        return x
    
net = NeuralNet()

"""
Approximation of the Marginal Distribution

"""

def mariginal_distribution(conditionals: torch.Tensor) -> torch.Tensor:
    """
    Approximates the mariginal probability according to Eq (15)
    
    Args:
    - conditionals: conditional probabilities

    Returns
    - An approximation of mariginal probabilities
    """

    return torch.sum(conditionals) / conditionals.size()[0]

In [48]:
"""
Mutual Information

"""

def shannon_entropy(probabilities: torch.Tensor) -> float:
    """
    Computes the Shannon entropy of a tensor of probabilities. According to EEq. (9)
    
    Args:
    - probabilities (torch.Tensor):
    
    Returns:
    - float: the Shannon entropy
    """

    return -torch.sum(probabilities * torch.log(probabilities))


def mutual_information(probabilities: torch.Tensor, conditionals: torch.Tensor) -> float:
    """
    Calculate the mutual information between two discrete random variables. According to Eq. (7)
    
    Args:
    - probabilities (torch.Tensor): The joint probabilities of the two random variables.
    - conditionals (torch.Tensor): The conditional probabilities of each outcome of the second random variable given the first random variable.
    
    Returns:
    - float: The mutual information between the two random variables.
    """
    marg_entropy = shannon_entropy(probabilities)
    cond_entropy = shannon_entropy(conditionals)
    
    return marg_entropy - cond_entropy

In [49]:
"""
Self-Augmented Training (SAT)

"""

# def KL_divergence(p: torch.Tensor, q: torch.Tensor) -> float:
#     """
#     Calculate the Kullback-Leibler divergence

#     Args:
#     - p (torch.Tensor): probability distribution
#     - q (torch.Tensor): probability distribution

#     Retruns:
#     - the Kullback-Leibler divergence as a float
#     """

#     return torch.sum(p * torch.log(p / q))

def virtual_adversarial_perturbation(model: NeuralNet, x: torch.Tensor, eps: float = 1.0, ksi: float = 1e1, num_iters: int = 1) -> torch.Tensor:
    """
    Calculate the virtual adversarial perturbation for a batch of input samples x.

    Args:
        model: neural network model (unnormalized log probabilities of the model for the input samples)
        x: input samples (batch_size x input_dim)
        eps: perturbation size (float)
        ksi: small constant used for computing the finite difference approximation of the KL divergence (float)
        num_iters: number of iterations to use for computing the perturbation (int)

    Returns:
        d: virtual adversarial perturbation for the input samples (batch_size x input_dim)
    """

    # Compute discrete representation of input samples.
    y = F.softmax(model(x), dim=1)

    # Initial perturbation with the same shape as input samples.
    r = torch.randn_like(x)

    for i in range(num_iters):
        # Compute the discrete representation of the perturbed datapoints.
        y_p = F.softmax(model(x + r * ksi), dim=1)
     
        # Compute the KL divergence between the probabilities
        kl_div = F.kl_div(y.log(), y_p, reduction='batchmean')
     
        # Compute the gradient of current tensor w.r.t. graph leaves.
        grad_r = torch.autograd.grad(kl_div, r, create_graph=True)[0]
     
        # Set the perturbation as the gradient of the KL divergence w.r.t. r
        r = grad_r.detach()

    return r * eps


def virtual_adversarial_training(model: NeuralNet, x: torch.Tensor, eps: float = 1.0, xi: float = 1e-6, num_iters: int = 1):
    """Apply virtual adversarial training to a batch of input samples.

    Args:
        model: A neural network model.
        x: Input samples of shape `(batch_size x input_dim)`.
        eps: Perturbation size.
        xi: A small constant used for computing the finite difference approximation of the KL divergence.
        num_iters: The number of iterations to use for computing the perturbation.

    Returns:
        The total loss (sum of cross-entropy loss on original input and perturbed input) for the batch.

    """

    # Compute discrete representation of input samples.
    y = F.softmax(model(x), dim=1)

    # Compute the virtual adversarial perturbation for the input
    vad = virtual_adversarial_perturbation(model, x, eps, xi, num_iters)

    loss = F.kl_div(y.log(), model(x + vad), reduction='batchmean')

    return loss


In [58]:
"""
Training the model

"""

# Define the training function
def train(model, train_loader, criterion, optimizer, num_epochs):
    """
    Trains a given model using the provided training data, optimizer and loss criterion for a given number of epochs.

    Args:
        model: Neural network model to train.
        train_loader: PyTorch data loader containing the training data.
        criterion: Loss criterion used for training the model.
        optimizer: Optimizer used to update the model's parameters.
        num_epochs: Number of epochs to train the model.

    Returns:
        None
    """
    # Loop over the epochs
    for epoch in range(num_epochs):
        # Initialize running loss for the epoch
        running_loss = 0.0
        # Loop over the mini-batches in the data loader
        for i, data in enumerate(train_loader):
            # Get the inputs and labels for the mini-batch
            inputs, labels = data
            # Zero the parameter gradients
            optimizer.zero_grad()
            # Forward pass through the model
            outputs = model(inputs)
            # Compute the loss
            loss = criterion(outputs, labels)
            # Backward pass through the model and compute gradients
            loss.backward()
            # Update the weights
            optimizer.step()
            # Accumulate the loss for the mini-batch
            running_loss += loss.item()
        # Compute the average loss for the epoch and print
        print(f"Epoch {epoch+1} loss: {running_loss/len(train_loader)}")

# Initialize the model, loss function, and optimizer
model = NeuralNet()
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr)

# Train the model
train(model, train_loader, criterion, optimizer, num_epochs)




SyntaxError: invalid syntax (1363284035.py, line 22)

In [50]:
"""

"""

# # For running net on single training example
# net.eval()

# cond_pr = F.softmax(net(test_image), dim=1)
# marg_pr = cond_pr # TODO Implement according to Eq. 15

# mutual_information(marg_pr, cond_pr)
# # shannon_entropy(marg_pr)



'\n\n'

In [None]:
"""
Evaluation Metric

"""
# TODO Consider implementing the unsupervised clustering accuracy (ACC), see to Eq. (16).
