**SPML**

**HW6**

**Name: Javad Hezareh**

**Student No.: 98101074**

1. Import libraries

In [1]:
import os
import sys
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import time

import torch
import torch.nn as nn
import torch.optim as optim

import torchvision.utils
from torchvision import models
import torchvision.datasets as datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

2. Define model

In [2]:
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer = nn.Sequential(
            nn.Conv2d(1,16,5), # 16*24*24
            nn.ReLU(),
            nn.Conv2d(16,32,5), # 32*20*20
            nn.ReLU(),
            nn.MaxPool2d(2,2), # 32*10*10
            nn.Conv2d(32,64,5), # 64*6*6
            nn.ReLU(),
            nn.MaxPool2d(2,2) #64*3*3
        )

        self.fc_layer = nn.Sequential(
            nn.Linear(64*3*3,100),
            nn.ReLU(),
            nn.Linear(100,10)
        )

    def forward(self,x):
        out = self.layer(x)
        out = out.view(-1,64*3*3)
        out = self.fc_layer(out)

        return out

3. Load original dataset

In [3]:
# todo: Define Batch size & Load MNIST dataset #
device = torch.device(torch.cuda.current_device()) if torch.cuda.is_available() else torch.device('cpu')
np.random.seed(0)


batch_size = 128

mnist_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.1307,), (0.3081,))
])

mnist_train = datasets.MNIST('./data', train=True, download=True, transform=mnist_transform)
mnist_test = datasets.MNIST('./data', train=False, download=True, transform=mnist_transform)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data/MNIST/raw/train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:00<00:00, 216595439.69it/s]

Extracting ./data/MNIST/raw/train-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data/MNIST/raw/train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<00:00, 39393721.57it/s]


Extracting ./data/MNIST/raw/train-labels-idx1-ubyte.gz to ./data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw/t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 67842098.83it/s]

Extracting ./data/MNIST/raw/t10k-images-idx3-ubyte.gz to ./data/MNIST/raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<00:00, 26025312.52it/s]


Extracting ./data/MNIST/raw/t10k-labels-idx1-ubyte.gz to ./data/MNIST/raw



## Training the substitute model

In [4]:
oracle = CNN().to(device)

# todo: load oracle's checkpoint
oracle.load_state_dict(torch.load('./checkpoint.pth'))

<All keys matched successfully>

Defining the model

In [5]:
class Net(nn.Module):
    def __init__(self):
        super().__init__()
        ### TO DO ###
        self.conv_layers = nn.Sequential(
            nn.Conv2d(1,16,5),          # B 16 24 24
            nn.ReLU(),
            nn.MaxPool2d(2, 2),         # B 16 12 12
            nn.Conv2d(16, 32, 5),       # B 32 8 8
            nn.ReLU()
        )
        self.fc = nn.Sequential(
            nn.Flatten(),
            nn.Linear(32*8*8, 256),     # B 256
            nn.ReLU(),
            nn.Linear(256, 10)          # B 10
        )
        #############

    def forward(self, x):
        ### TO DO ###
        features = self.conv_layers(x)
        out = self.fc(features)
        #############
        return out

Implement Jacobian-based Data Augmentation Algorithm

In [6]:
from torch.autograd.functional import jacobian

In [7]:
def train_JDA(net, X, oracle, epochs=45, lmbd=0.1, max_rho=6):
    ### TO DO ###
    ### Note that you can only use the prediction labels in the training
    ### You can get the prediction labels using net(X).max(1).indices
    def get_labeling(X, oracle):
        print('\t-labeling phase ...')
        labels = []
        loader = DataLoader(X, batch_size=batch_size, shuffle=False)
        oracle.eval()
        for x in loader:
            x = x.to(device)
            pred_labels = torch.argmax(oracle(x), dim=1)
            labels.append(pred_labels)
        print('\t-labeling phase done!')
        return torch.cat(labels, dim=0).squeeze()


    def train_model(net, X, y, epochs):
        print('\t-training phase ...')
        y = y.to(device)
        X = X.to(device)
        dataset = [(img, label) for img, label in zip(X, y)]
        loader = DataLoader(dataset, batch_size=batch_size, shuffle=True)
        optimizer = optim.Adam(net.parameters(), lr=1e-3)
        criterion = nn.CrossEntropyLoss()

        net.train()
        for epoch in range(epochs):
            for imgs, labels in loader:
                optimizer.zero_grad()
                logits = net(imgs)
                loss = criterion(logits, labels)
                loss.backward()
                optimizer.step()
        print('\t-training phase done.')
        return


    def update_dataset(net, oracle, X, lmbd):
        print('\t-update phase ...')
        loader = DataLoader(X, batch_size=batch_size, shuffle=False)
        X = X.to(device)
        oracle.eval()
        net.eval()

        new_X = []
        for x in loader:
            x = x.to(device)
            oracle_label = torch.argmax(oracle(x), dim=1)
            j_F = jacobian(net, x)
            new_x = x + lmbd * j_F[range(len(x)), oracle_label, range(len(x))]
            new_X.append(new_x)
        print('\t-update phase done.')
        new_X = torch.cat(new_X, dim=0)
        return torch.cat([X, new_X], dim=0)


    for i in range(max_rho):
        print(f'alg iteration {i} begins ...')
        y = get_labeling(X, oracle)
        train_model(net, X, y, epochs)
        X = update_dataset(net, oracle, X, lmbd)
    #############

def compute_accuracy(net, X, y):
    ### TO DO ###
    predicted = torch.argmax(net(X), dim=1)
    return (predicted == y).sum() / len(X)
    #############

In [16]:
### TO DO ###
### Select 100 images with their labels from the dataset, randomly
indices = torch.randperm(len(mnist_train))[:100]
JDA_train_X = (mnist_train.data[indices] / 255).unsqueeze(1)
JDA_train_y = mnist_train.targets[indices]
#############

net = Net().to(device)

In [17]:
train_JDA(net, JDA_train_X, oracle, epochs=100, lmbd=0.01)

alg iteration 0 begins ...
	-labeling phase ...
	-labeling phase done!
	-training phase ...
	-training phase done.
	-update phase ...
	-update phase done.
alg iteration 1 begins ...
	-labeling phase ...
	-labeling phase done!
	-training phase ...
	-training phase done.
	-update phase ...
	-update phase done.
alg iteration 2 begins ...
	-labeling phase ...
	-labeling phase done!
	-training phase ...
	-training phase done.
	-update phase ...
	-update phase done.
alg iteration 3 begins ...
	-labeling phase ...
	-labeling phase done!
	-training phase ...
	-training phase done.
	-update phase ...
	-update phase done.
alg iteration 4 begins ...
	-labeling phase ...
	-labeling phase done!
	-training phase ...
	-training phase done.
	-update phase ...
	-update phase done.
alg iteration 5 begins ...
	-labeling phase ...
	-labeling phase done!
	-training phase ...
	-training phase done.
	-update phase ...
	-update phase done.


In [18]:
### TO DO ###
### load the whole test and train dataset in one full batch

# _ = torch.utils.data.DataLoader(<?>, <?>, shuffle=True)
train_X, train_y = mnist_train.data.unsqueeze(1) / 255, mnist_train.targets

# _ = torch.utils.data.DataLoader(<?>, <?>, shuffle=True)
test_X, test_y = mnist_test.data.unsqueeze(1) / 255, mnist_test.targets
#############

In [19]:
### TO DO ###
### compute accuracy of net on train and test
train_X = train_X.to(device)
test_X = test_X.to(device)
train_y = train_y.to(device)
test_y = test_y.to(device)

print(f'accuracy on train: {compute_accuracy(net, train_X, train_y)*100:0.2f} %')
print(f'accuracy on test: {compute_accuracy(net, test_X, test_y)*100:0.2f} %')
#############

accuracy on train: 40.33 %
accuracy on test: 41.27 %
