**SPML**

**HW6**

**Name: Hamidreza Amirzadeh**

**Student No.: 401206999**

1. Import libraries

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.utils
from torchvision import models
import torchvision.datasets as dsets
import torchvision.transforms as transforms

2. Define model

In [2]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer = nn.Sequential(
            nn.Conv2d(1,16,5), # 16*24*24
            nn.ReLU(),
            nn.Conv2d(16,32,5), # 32*20*20
            nn.ReLU(),
            nn.MaxPool2d(2,2), # 32*10*10
            nn.Conv2d(32,64,5), # 64*6*6
            nn.ReLU(),
            nn.MaxPool2d(2,2) #64*3*3
        )

        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )

    def forward(self,x):
        out = self.layer(x)
        out = out.view(-1,64*3*3)
        out = self.fc_layer(out)

        return out


3. Load original dataset

In [None]:
# todo: Define Batch size & Load MNIST dataset #
device = torch.device(torch.cuda.current_device()) if torch.cuda.is_available() else torch.device('cpu')
print('device:', device)
np.random.seed(0)

batch_size = 32
mnist_train = dsets.MNIST(root='./Mnistdata', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = dsets.MNIST(root='./Mnistdata', train=False, transform=transforms.ToTensor(), download=True)

## Training the substitute model

In [4]:
oracle = CNN().cuda()

# todo: load oracle's checkpoint
oracle.load_state_dict(torch.load("checkpoint.pth", map_location=device))
oracle = oracle.to(device)
oracle.eval()

CNN(
  (layer): Sequential(
    (0): Conv2d(1, 16, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): Conv2d(16, 32, kernel_size=(5, 5), stride=(1, 1))
    (3): ReLU()
    (4): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (5): Conv2d(32, 64, kernel_size=(5, 5), stride=(1, 1))
    (6): ReLU()
    (7): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=576, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)

Defining the model

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        ### TO DO ###
        self.layer = nn.Sequential(
            nn.Conv2d(1,32,5),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
            nn.Conv2d(32,64,3),
            nn.ReLU(),
            nn.MaxPool2d(2,2),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(64*5*5,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )
        #############

    def forward(self, x):
        ### TO DO ###
        x = self.layer(x)
        x = x.view(-1,64*5*5)
        x = self.fc_layer(x)
        #############
        return x

Implement Jacobian-based Data Augmentation Algorithm

In [10]:
def train_JDA(net, X, oracle, epochs=45, lmbd=0.1, max_rho=6):
    ### TO DO ###
    ### Note that you can only use the prediction labels in the training
    ### You can get the prediction labels using net(X).max(1).indices
    def train_from_scratch(net, X, y, epochs):
        net.to(device)
        net.train()
        for module in net.modules():
            if hasattr(module, 'reset_parameters'):
                module.reset_parameters()

        criterion = nn.CrossEntropyLoss()
        optimizer = optim.SGD(net.parameters(), lr=0.01)
        batch_size = 16
        batch_count = len(X)//batch_size if len(X) % batch_size == 0 else (len(X)//batch_size + 1)

        for epoch in range(epochs):
            for i in range(batch_count):
                j = (i + 1) * batch_size if ((i + 1) * batch_size) <= len(X) else len(X)
                input_data = torch.stack(X[i*batch_size:j]).to(device)
                label_data = y[i*batch_size:j].to(device)
                optimizer.zero_grad()
                outputs = net(input_data)
                loss = criterion(outputs, label_data)
                loss.backward()
                optimizer.step()

        return net

    for a in range(max_rho):
        print(f"rho: {a + 1} / {max_rho}, dataset size = {len(X)}")
        # train net from scratch on new data and oracle labels
        oracle_labels = oracle(torch.stack(X).to(device)).max(1).indices
        net = train_from_scratch(net, X, oracle_labels, epochs)
        net.to(device)
        net.eval()

        # augment dataset
        oracle_labels = oracle_labels.to(device)
        current_dataset_size = len(X)
        for i in range(current_dataset_size):
            current_data = X[i].unsqueeze(0).to(device)
            current_data_copy = current_data.clone().requires_grad_(True)
            net_output = net(current_data_copy)
            net.zero_grad()
            gradient = torch.autograd.grad(net_output[0][oracle_labels[i]], current_data_copy)[0]
            new_data = current_data.squeeze(0) +  lmbd * torch.sign(gradient).squeeze(0)
            X.append(new_data.detach().cpu())

    return net
    #############

def compute_accuracy(net, X, y):
    ### TO DO ###
    X = X.to(device)
    output = net(X)
    label = output.max(1).indices
    original_correct = (label == y.to(device)).sum().item()
    oracle_labels = oracle(X).max(1).indices
    oracle_correct = (label == oracle_labels.to(device)).sum().item()
    total = X.shape[0]
    print(f"Original accuracy: {round(original_correct/total, 3)}\nOracle accuracy: {round(oracle_correct/total, 3)}")
    #############

In [8]:
### TO DO ###
### Select 100 images with their labels from the dataset, randomly
import random

JDA_train_X = []
JDA_train_y = []

random_indices = random.sample(range(len(mnist_train)), 100)
for i in random_indices:
    image, label = mnist_train[i]
    JDA_train_X.append(image)
    JDA_train_y.append(label)
#############

net = Net()

In [11]:
train_JDA(net, JDA_train_X, oracle)

rho: 1 / 6, dataset size = 400
rho: 2 / 6, dataset size = 800
rho: 3 / 6, dataset size = 1600
rho: 4 / 6, dataset size = 3200
rho: 5 / 6, dataset size = 6400
rho: 6 / 6, dataset size = 12800


Net(
  (layer): Sequential(
    (0): Conv2d(1, 32, kernel_size=(5, 5), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc_layer): Sequential(
    (0): Linear(in_features=1600, out_features=100, bias=True)
    (1): ReLU()
    (2): Linear(in_features=100, out_features=10, bias=True)
  )
)

In [12]:
### TO DO ###
### load the whole test and train dataset in one full batch
train_dataloader = torch.utils.data.DataLoader(mnist_train, len(mnist_train), shuffle=True)
train_X, train_y = next(iter(train_dataloader))

test_dataloader = torch.utils.data.DataLoader(mnist_test, len(mnist_test), shuffle=True)
test_X, test_y = next(iter(test_dataloader))
#############

In [15]:
### TO DO ###
### compute accuracy of net on train and test
torch.cuda.empty_cache()

print("Test Accuracy of substantial model:")
compute_accuracy(net, test_X, test_y)
print('-'*40)
print("Test Accuracy of oracle model:")
compute_accuracy(oracle, test_X, test_y)
# #############

Test Accuracy of substantial model:
Original accuracy: 0.436
Oracle accuracy: 0.723
----------------------------------------
Test Accuracy of oracle model:
Original accuracy: 0.413
Oracle accuracy: 1.0
