**SPML**

**HW6**


1. Import libraries

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.utils
from torchvision import models
import torchvision.datasets as dsets
import torchvision.transforms as transforms

2. Define model

In [64]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer = nn.Sequential(
            nn.Conv2d(1,16,5), # 16*24*24
            nn.ReLU(),
            nn.Conv2d(16,32,5), # 32*20*20
            nn.ReLU(),
            nn.MaxPool2d(2,2), # 32*10*10
            nn.Conv2d(32,64,5), # 64*6*6
            nn.ReLU(),
            nn.MaxPool2d(2,2) #64*3*3
        )

        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )

    def forward(self,x):
        out = self.layer(x)
        out = out.view(-1,64*3*3)
        out = self.fc_layer(out)

        return out


3. Load original dataset

In [3]:
# todo: Define Batch size & Load MNIST dataset #


device = torch.device(torch.cuda.current_device()) if torch.cuda.is_available() else torch.device('cpu')
np.random.seed(0)

batch_size = 128
transform = transforms.Compose([
    transforms.ToTensor()
])
mnist_train = dsets.MNIST(root='./data', train=True, transform=transform, download=True)
mnist_test = dsets.MNIST(root='./data', train=False, transform=transform)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 92682315.75it/s]


Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 33398316.47it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 28763122.22it/s]


Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 19031497.27it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



## Training the substitute model

In [65]:
oracle = CNN().cuda()

# todo: load oracle's checkpoint

oracle_checkpoint_path = '/content/drive/MyDrive/MSC1401_1/SPML/HW6/checkpoint.pth'
oracle.load_state_dict(torch.load(oracle_checkpoint_path))


<All keys matched successfully>

Defining the model

In [44]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        ### TO DO ###
        self.conv_layer = nn.Sequential(
            nn.Conv2d(1, 16, 5),
            nn.ReLU(),
            nn.Conv2d(16, 32, 5),
            nn.ReLU(),
            nn.MaxPool2d(2, 2),
        )
        self.fc_layer = nn.Sequential(
            nn.Linear(32 * 10 * 10, 100),
            nn.ReLU(),
            nn.Linear(100, 10)
        )
        #############

    def forward(self, x):
        ### TO DO ###
        out = self.conv_layer(x)
        out = out.view(out.size(0), -1)
        out = self.fc_layer(out)
        return out
        #############


Implement Jacobian-based Data Augmentation Algorithm

In [66]:
def train_JDA(net, X, oracle, epochs=45, lmbd=0.1, max_rho=6):
    ### TO DO ###
    ### Note that you can only use the prediction labels in the training
    ### You can get the prediction labels using net(X).max(1).indices
    for rho in range(max_rho):
        D = []
        for x in X:
            x = x.cuda()
            pred_labels = oracle(x).max(1).indices.item()
            D.append((x, pred_labels))

        # Train F on D to evaluate parameters θF
        optimizer = torch.optim.SGD(net.parameters(), lr=0.001)
        criterion = nn.CrossEntropyLoss()

        for epoch in range(epochs):
            total_loss = 0
            for x, labels in D:
                optimizer.zero_grad()
                outputs = net(x.unsqueeze(0))
                loss = criterion(outputs, torch.tensor([labels]).cuda())
                loss.backward()
                optimizer.step()
                total_loss += loss.item()
            print(f"Epoch [{epoch+1}/{epochs}], Loss: {total_loss/len(D):.4f}")

        # Perform Jacobian-based dataset augmentation
        augmented_X = []
        with torch.no_grad():
            for x, labels in D:
                outputs = net(x.unsqueeze(0))
                jacobian = torch.autograd.functional.jacobian(lambda x: net(x), x.unsqueeze(0))
                augmented_x = x + lmbd * torch.sign(jacobian[0][labels])
                augmented_x = augmented_x.squeeze(0)

                augmented_X.append(augmented_x)

        X = X + augmented_X



    return net
    #############

def compute_accuracy(net, X, y):
    ### TO DO ###
    net.eval()
    total = 0
    correct = 0

    with torch.no_grad():
        inputs = X.cuda()
        labels = y.cuda()
        outputs = net(inputs)
        _, predicted = torch.max(outputs.data, 1)
        total += labels.size(0)
        correct += (predicted == labels).sum().item()

    accuracy = 100.0 * correct / total
    return accuracy
    #############

In [72]:
### TO DO ###
### Select 100 images with their labels from the dataset, randomly
indices = np.random.choice(len(mnist_train), size=100, replace=False)
JDA_train_X = [mnist_train[i][0] for i in indices]
JDA_train_y = [mnist_train[i][1] for i in indices]
#############
net = Net().cuda()

In [74]:
net = train_JDA(net, JDA_train_X, oracle)

Epoch [1/45], Loss: 2.2400
Epoch [2/45], Loss: 2.1754
Epoch [3/45], Loss: 2.0856
Epoch [4/45], Loss: 1.9336
Epoch [5/45], Loss: 1.7287
Epoch [6/45], Loss: 1.5984
Epoch [7/45], Loss: 1.5262
Epoch [8/45], Loss: 1.4683
Epoch [9/45], Loss: 1.4134
Epoch [10/45], Loss: 1.3591
Epoch [11/45], Loss: 1.3008
Epoch [12/45], Loss: 1.2350
Epoch [13/45], Loss: 1.1601
Epoch [14/45], Loss: 1.0805
Epoch [15/45], Loss: 1.0002
Epoch [16/45], Loss: 0.9241
Epoch [17/45], Loss: 0.8549
Epoch [18/45], Loss: 0.7952
Epoch [19/45], Loss: 0.7423
Epoch [20/45], Loss: 0.6968
Epoch [21/45], Loss: 0.6571
Epoch [22/45], Loss: 0.6221
Epoch [23/45], Loss: 0.5904
Epoch [24/45], Loss: 0.5606
Epoch [25/45], Loss: 0.5337
Epoch [26/45], Loss: 0.5060
Epoch [27/45], Loss: 0.4799
Epoch [28/45], Loss: 0.4562
Epoch [29/45], Loss: 0.4310
Epoch [30/45], Loss: 0.4069
Epoch [31/45], Loss: 0.3841
Epoch [32/45], Loss: 0.3622
Epoch [33/45], Loss: 0.3411
Epoch [34/45], Loss: 0.3213
Epoch [35/45], Loss: 0.3033
Epoch [36/45], Loss: 0.2850
E

In [75]:
### TO DO ###
### load the whole test and train dataset in one full batch
batch_size = len(mnist_train)

train_loader = torch.utils.data.DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
train_X, train_y = next(iter(train_loader))

test_loader = torch.utils.data.DataLoader(mnist_test, batch_size=len(mnist_test), shuffle=True)
test_X, test_y = next(iter(test_loader))
#############

In [76]:
### TO DO ###
### compute accuracy of net on train and test
train_accuracy = compute_accuracy(net, train_X, train_y)
test_accuracy = compute_accuracy(net, test_X, test_y)

print('Train Accuracy: {:.2f}%'.format(train_accuracy))
print('Test Accuracy: {:.2f}%'.format(test_accuracy))
#############

Train Accuracy: 37.93%
Test Accuracy: 38.70%
