# Boilerplate

Package installation, loading, and dataloaders. There's also a simple model defined. You can change it your favourite architecture if you want.

In [68]:
# !pip install tensorboardX

import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
import numpy as np
import time
import matplotlib.pyplot as plt

from torchvision import datasets, transforms
# from tensorboardX import SummaryWriter

# use_cuda = False
# device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
device = torch.device('cpu')
print(device)
batch_size = 64

np.random.seed(42)
torch.manual_seed(42)


## Dataloaders
train_dataset = datasets.MNIST('mnist_data/', train=True, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))
test_dataset = datasets.MNIST('mnist_data/', train=False, download=True, transform=transforms.Compose(
    [transforms.ToTensor()]
))

train_loader = torch.utils.data.DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = torch.utils.data.DataLoader(test_dataset, batch_size=batch_size, shuffle=False)



## NN defined as specified in the assignment. 3 fully connected layers (followed by ReLU activations)
## of size 50.
## interval analysis function also defined here.
class IntervalNet(nn.Module):
    def __init__(self):
        super(IntervalNet, self).__init__()

        # Define the network layers
        self.fc1 = nn.Linear(28*28, 50)         # First fully connected layer
        self.relu1 = nn.ReLU()                  # First ReLU
        self.fc2 = nn.Linear(50, 50)            # Second fully connected layer
        self.relu2 = nn.ReLU()                  # Second ReLU
        self.fc3 = nn.Linear(50, 50)            # Third fully connected layer
        self.relu3 = nn.ReLU()                  # Third ReLU
        self.output_layer = nn.Linear(50, 10)   # Output layer

    def forward(self, x):
        # Forward pass through the layers
        # flatten the images
        x = x.view((-1, 28*28))

        x = self.fc1(x)
        x = self.relu1(x)
        x = self.fc2(x)
        x = self.relu2(x)
        x = self.fc3(x)
        x = self.relu3(x)
        x = self.output_layer(x)  # No activation at the output layer
        return x

model = IntervalNet()

model = model.to(device)
model.train()

loss_func = nn.CrossEntropyLoss()

cpu


# Model Training

In [69]:
def train_model(model, num_epochs):
    # TODO: implement this function that trains a given model on the MNIST dataset.
    # this is a general-purpose function for both standard training and adversarial training.
    # (toggle enable_defense parameter to switch between training schemes)
    optimizer = optim.SGD(model.parameters())

    model.train()

    for _ in range(num_epochs):
        # code adapted from official pytorch guide here: https://pytorch.org/tutorials/beginner/blitz/cifar10_tutorial.html
        # running_loss = 0.0
        for inputs, labels in train_loader:
            # get the inputs; data is a list of [inputs, labels]
            # inputs, labels = data
            inputs = inputs.to(device)
            labels = labels.to(device)

            optimizer.zero_grad()

            # forward + backward + optimize
            outputs = model(inputs)
            loss = loss_func(outputs, labels)
            loss.backward()
            optimizer.step()

    print("Training complete")


In [70]:
def test_model(model):
    model.eval()  # Set the model to evaluation mode
    test_loss = 0
    correct = 0
    for test_inputs, true_labels in test_loader:
        test_inputs = test_inputs.to(device)
        true_labels = true_labels.to(device)

        # Forward pass
        output = model(test_inputs)
        test_loss += loss_func(output, true_labels).item()  # Sum up batch loss

        pred = output.argmax(dim=1, keepdim=True)  # Get the index of the max log-probability

        correct += pred.eq(true_labels.view_as(pred)).sum().item()

    test_loss /= len(test_loader.dataset)
    accuracy = 100. * correct / len(test_loader.dataset)

    print(f"Test set: Average loss: {test_loss:.4f}, Accuracy: {correct}/{len(test_loader.dataset)} ({accuracy:.2f}%)")
    return accuracy

In [71]:
train_model(model, 20)
torch.save(model.state_dict(), 'weights.pt')

Training complete


# Brief model evaluation

In [72]:
model = IntervalNet().to(device)
model.load_state_dict(torch.load('weights.pt'))

test_model(model)

  model.load_state_dict(torch.load('weights.pt'))


Test set: Average loss: 0.0096, Accuracy: 8148/10000 (81.48%)


81.48

# Evaluate on L-infinity neighborhoods

In [102]:
## Define interval analysis functions
def interval_propagation(model, x_min, x_max):
    # Propagate through each layer of the model
    for layer in model.children():
        if isinstance(layer, nn.Linear):
            x_min, x_max = propagate_linear(layer, x_min, x_max)
        elif isinstance(layer, nn.ReLU):
            x_min, x_max = propagate_relu(x_min, x_max)
    return x_min, x_max

def propagate_linear(layer, x_min, x_max):
    W = layer.weight
    b = layer.bias

    # i think this is a fast way to do it instead of manually looping through the weights
    W_positive = torch.clamp(W, min=0)  # positive part of W
    W_negative = torch.clamp(W, max=0)  # negative part of W

    # transpose for matrix multiplication. 
    x_min = torch.transpose(x_min, 0, 1)
    x_max = torch.transpose(x_max, 0, 1)

    # y_min without the bias
    y_min = torch.transpose(torch.matmul(W_positive, x_min) + torch.matmul(W_negative, x_max), 0, 1) + b
    y_max = torch.transpose(torch.matmul(W_positive, x_max) + torch.matmul(W_negative, x_min), 0, 1) + b

    return y_min, y_max

def propagate_relu(x_min, x_max):
    # Apply ReLU interval-wise
    return torch.relu(x_min), torch.relu(x_max)

In [115]:
# Define L-infinity neighborhood sizes (evenly spaced between 0.01 and 0.1)
def evaluate_robustness(epss):
    model.eval()  # Set the model to evaluation mode

    for eps in epss:
        robust = 0
        total = 0

        for samples, t_labels in test_loader:
            samples = samples.to(device)
            t_labels = t_labels.to(device)

            samples = samples.view((-1, 28*28))  # Flatten MNIST image

            # Define input intervals for L-infinity perturbation
            x_min = samples - eps  # Lower bound of the interval
            x_max = samples + eps  # Upper bound of the interval

            # Perform interval propagation
            output_min, output_max = interval_propagation(model, x_min, x_max)

            # Evaluation in a neighborhood is robust, if the true label's minimum prediction
            # is larger than any other label's maximum
            for i in range(len(samples)):
                total += 1
                true_class_min = output_min[i][t_labels[i]]
                output_max[i][t_labels[i]] = -1 # remove the true label's maximum from consideration
                if output_max[i].max() < true_class_min:
                    robust += 1

        robustness = robust / total
        print(f"Robustness for epsilon {eps:.3f}: {robustness * 100:.2f}%")

In [116]:
epsilons = torch.linspace(0.01, 0.1, steps=10)

evaluate_robustness(epsilons)

Robustness for epsilon 0.010: 0.00%
Robustness for epsilon 0.020: 0.00%
Robustness for epsilon 0.030: 0.00%
Robustness for epsilon 0.040: 0.00%
Robustness for epsilon 0.050: 0.00%
Robustness for epsilon 0.060: 0.00%
Robustness for epsilon 0.070: 0.00%
Robustness for epsilon 0.080: 0.00%
Robustness for epsilon 0.090: 0.00%
Robustness for epsilon 0.100: 0.00%
