In [2]:
# torch and torchvision imports
import torch
import torchvision
import torch.nn as nn
import torchvision.transforms as transforms
import torch.optim as optim
import matplotlib.pyplot as plt

device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')     

### (a) Plot the training and validation losses and errors as a function of the number of epochs


 The model currently does not achieve less than 12% validation error, you have to tweak the parameters to get it.

In [3]:
# Reading in the dataset
transform = transforms.Compose(
    [transforms.ToTensor(),
     transforms.Normalize((0.5, 0.5, 0.5), (0.5, 0.5, 0.5))])

trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                        download=True, transform=transform)
trainloader = torch.utils.data.DataLoader(trainset, batch_size=16,
                                          shuffle=True)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                       download=True, transform=transform)
testloader = torch.utils.data.DataLoader(testset, batch_size=16,
                                         shuffle=False)

classes = ('plane', 'car', 'bird', 'cat',
           'deer', 'dog', 'frog', 'horse', 'ship', 'truck')


# Defining the model
class View(nn.Module):
    def __init__(self,o):
        super().__init__()
        self.o = o

    def forward(self,x):
        return x.view(-1, self.o)
    
class allcnn_t(nn.Module):
    def __init__(self, c1=96, c2= 192):
        super().__init__()
        d = 0.5

        def convbn(ci,co,ksz,s=1,pz=0):
            return nn.Sequential(
                nn.Conv2d(ci,co,ksz,stride=s,padding=pz),
                nn.ReLU(True),
                nn.BatchNorm2d(co))

        self.m = nn.Sequential(
            nn.Dropout(0.2),
            convbn(3,c1,3,1,1),
            convbn(c1,c1,3,1,1),
            convbn(c1,c1,3,2,1),
            nn.Dropout(d),
            convbn(c1,c2,3,1,1),
            convbn(c2,c2,3,1,1),
            convbn(c2,c2,3,2,1),
            nn.Dropout(d),
            convbn(c2,c2,3,1,1),
            convbn(c2,c2,3,1,1),
            convbn(c2,10,1,1),
            nn.AvgPool2d(8),
            View(10))

        print('Num parameters: ', sum([p.numel() for p in self.m.parameters()]))

    def forward(self, x):
        return self.m(x)

Files already downloaded and verified
Files already downloaded and verified


In [3]:
# The training loop

def train(net, optimizer, criterion, train_loader, test_loader, epochs, model_name, plot):
    model = net.to(device)
    total_step = len(train_loader)
    overall_step = 0
    train_loss_values = []
    train_error = []
    val_loss_values = []
    val_error = []
    for epoch in range(epochs):
        correct = 0
        total = 0
        flag = 0
        running_loss = 0.0
        if epoch == 25 and flag == 0:
          for op_params in optimizer.param_groups:
            op_params['lr'] = 0.001
          flag = 1
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to configured device
            images = images.to(device)
            labels = labels.to(device)
            #Forward Pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            optimizer.step()
            if (i+1) % 1000 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))
            if plot:
              info = { ('loss_' + model_name): loss.item() }

              # for tag, value in info.items():
              #   logger.scalar_summary(tag, value, overall_step+1)
        train_loss_values.append(running_loss)
        train_error.append(100-100*correct/total)

        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for i, (images, labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: {} %'.format(100 * correct / total))
        val_error.append(100-100*correct/total)
        val_loss_values.append(running_loss)
    return val_error,val_loss_values,train_error,train_loss_values


In [4]:
model = allcnn_t().to(device)
epochs = 50
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.00001, nesterov=True)
val_error,val_loss_values,train_error,train_loss_values= train(model, optimizer, criterion, trainloader, testloader, epochs, 'cnn_curve', True)

Num parameters:  1667166
Epoch [1/50], Step [1000/3125], Loss: 1.6618
Epoch [1/50], Step [2000/3125], Loss: 1.8104
Epoch [1/50], Step [3000/3125], Loss: 1.0856
Accuracy of the network on the test images: 53.01 %
Epoch [2/50], Step [1000/3125], Loss: 1.0043
Epoch [2/50], Step [2000/3125], Loss: 1.0001
Epoch [2/50], Step [3000/3125], Loss: 0.6464
Accuracy of the network on the test images: 71.14 %
Epoch [3/50], Step [1000/3125], Loss: 0.6538
Epoch [3/50], Step [2000/3125], Loss: 0.3558
Epoch [3/50], Step [3000/3125], Loss: 0.3802
Accuracy of the network on the test images: 78.98 %
Epoch [4/50], Step [1000/3125], Loss: 0.5748
Epoch [4/50], Step [2000/3125], Loss: 0.2281
Epoch [4/50], Step [3000/3125], Loss: 0.7438
Accuracy of the network on the test images: 79.24 %
Epoch [5/50], Step [1000/3125], Loss: 0.3864
Epoch [5/50], Step [2000/3125], Loss: 0.3971
Epoch [5/50], Step [3000/3125], Loss: 0.1077
Accuracy of the network on the test images: 80.28 %
Epoch [6/50], Step [1000/3125], Loss: 0.

In [7]:
torch.save(model, './Intital_hw2_p3_model/model1.pt')

### Increasing the number of epochs

In [8]:
# The training loop

def train(net, optimizer, criterion, train_loader, test_loader, epochs, model_name, plot):
    model = net.to(device)
    total_step = len(train_loader)
    overall_step = 0
    train_loss_values = []
    train_error = []
    val_loss_values = []
    val_error = []
    for epoch in range(epochs):
        correct = 0
        total = 0
        flag = 0
        running_loss = 0.0
        if epoch == 40 and flag == 0:
          for op_params in optimizer.param_groups:
            op_params['lr'] = 0.001
          flag = 1
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to configured device
            images = images.to(device)
            labels = labels.to(device)
            #Forward Pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            optimizer.step()
            if (i+1) % 1000 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))
            if plot:
              info = { ('loss_' + model_name): loss.item() }

              # for tag, value in info.items():
              #   logger.scalar_summary(tag, value, overall_step+1)
        train_loss_values.append(running_loss)
        train_error.append(100-100*correct/total)

        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for i, (images, labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: {} %'.format(100 * correct / total))
        val_error.append(100-100*correct/total)
        val_loss_values.append(running_loss)
    return val_error,val_loss_values,train_error,train_loss_values

In [9]:
model = allcnn_t().to(device)
epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.01, momentum=0.9, weight_decay=0.00001, nesterov=True)
val_error,val_loss_values,train_error,train_loss_values= train(model, optimizer, criterion, trainloader, testloader, epochs, 'cnn_curve', True)

Num parameters:  1667166
Epoch [1/100], Step [1000/3125], Loss: 1.9664
Epoch [1/100], Step [2000/3125], Loss: 1.1284
Epoch [1/100], Step [3000/3125], Loss: 1.0749
Accuracy of the network on the test images: 53.77 %
Epoch [2/100], Step [1000/3125], Loss: 1.2159
Epoch [2/100], Step [2000/3125], Loss: 0.6642
Epoch [2/100], Step [3000/3125], Loss: 0.8375
Accuracy of the network on the test images: 71.08 %
Epoch [3/100], Step [1000/3125], Loss: 1.0900
Epoch [3/100], Step [2000/3125], Loss: 0.5668
Epoch [3/100], Step [3000/3125], Loss: 0.6083
Accuracy of the network on the test images: 72.67 %
Epoch [4/100], Step [1000/3125], Loss: 0.5771
Epoch [4/100], Step [2000/3125], Loss: 0.3854
Epoch [4/100], Step [3000/3125], Loss: 0.6999
Accuracy of the network on the test images: 78.71 %
Epoch [5/100], Step [1000/3125], Loss: 0.5968
Epoch [5/100], Step [2000/3125], Loss: 0.7876
Epoch [5/100], Step [3000/3125], Loss: 0.3321
Accuracy of the network on the test images: 81.44 %
Epoch [6/100], Step [1000

In [10]:
torch.save(model, './Intital_hw2_p3_model/model1_100epoch_40lr.pt')

In [None]:
plt.plot(train_error)

### Changing the learning rate in train code

In [13]:
# The training loop - changing the learning rate as per suggestion given in the document

def train(net, optimizer, criterion, train_loader, test_loader, epochs, model_name, plot):
    model = net.to(device)
    total_step = len(train_loader)
    overall_step = 0
    train_loss_values = []
    train_error = []
    val_loss_values = []
    val_error = []
    schedule_lr = 0.1
    for epoch in range(epochs):
        correct = 0
        total = 0
        # flag = 0
        running_loss = 0.0
        # if epoch == 25 and flag == 0:
        #   for op_params in optimizer.param_groups:
        #     op_params['lr'] = 0.001
        #   flag = 1
        if epoch % 40 == 0 and epoch!=0:
           schedule_lr = schedule_lr * 0.1
           for op_params in optimizer.param_groups:
                op_params['lr'] = schedule_lr
        for i, (images, labels) in enumerate(train_loader):
            # Move tensors to configured device
            images = images.to(device)
            labels = labels.to(device)
            #Forward Pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            optimizer.zero_grad()
            loss.backward()
            running_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
            optimizer.step()
            if (i+1) % 1000 == 0:
              print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'.format(epoch+1, epochs, i+1, total_step, loss.item()))
            if plot:
              info = { ('loss_' + model_name): loss.item() }

              # for tag, value in info.items():
              #   logger.scalar_summary(tag, value, overall_step+1)
        train_loss_values.append(running_loss)
        train_error.append(100-100*correct/total)

        model.eval()
        with torch.no_grad():
            correct = 0
            total = 0
            for i, (images, labels) in enumerate(test_loader):
                images = images.to(device)
                labels = labels.to(device)
                outputs = model(images)
                _, predicted = torch.max(outputs.data, 1)
                total += labels.size(0)
                correct += (predicted == labels).sum().item()

        print('Accuracy of the network on the test images: {} %'.format(100 * correct / total))
        val_error.append(100-100*correct/total)
        val_loss_values.append(running_loss)
    return val_error,val_loss_values,train_error,train_loss_values

In [14]:
model = allcnn_t().to(device)
epochs = 100
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)
val_error,val_loss_values,train_error,train_loss_values= train(model, optimizer, criterion, trainloader, testloader, epochs, 'cnn_curve', True)

Num parameters:  1667166
Epoch [1/100], Step [1000/3125], Loss: 1.9747
Epoch [1/100], Step [2000/3125], Loss: 1.5987
Epoch [1/100], Step [3000/3125], Loss: 1.4638
Accuracy of the network on the test images: 52.81 %
Epoch [2/100], Step [1000/3125], Loss: 1.1166
Epoch [2/100], Step [2000/3125], Loss: 1.0751
Epoch [2/100], Step [3000/3125], Loss: 1.0468
Accuracy of the network on the test images: 65.77 %
Epoch [3/100], Step [1000/3125], Loss: 1.1766
Epoch [3/100], Step [2000/3125], Loss: 0.7306
Epoch [3/100], Step [3000/3125], Loss: 0.7910
Accuracy of the network on the test images: 70.54 %
Epoch [4/100], Step [1000/3125], Loss: 0.8822
Epoch [4/100], Step [2000/3125], Loss: 0.8566
Epoch [4/100], Step [3000/3125], Loss: 0.8056
Accuracy of the network on the test images: 76.02 %
Epoch [5/100], Step [1000/3125], Loss: 0.4265
Epoch [5/100], Step [2000/3125], Loss: 0.9794
Epoch [5/100], Step [3000/3125], Loss: 0.8934
Accuracy of the network on the test images: 74.12 %
Epoch [6/100], Step [1000

KeyboardInterrupt: 

In [15]:
torch.save(model, './Intital_hw2_p3_model/model3_100epoch_40lr_1e4wt.pt')

In [None]:
## saving these for plotting val_error,val_loss_values,train_error,train_loss_values
import numpy as np
np.save('./Intital_hw2_p3_model/val_error.npy', np.array(val_error))
np.save('./Intital_hw2_p3_model/val_loss_values.npy', np.array(val_loss_values))
np.save('./Intital_hw2_p3_model/train_error.npy', np.array(train_error))
np.save('./Intital_hw2_p3_model/train_loss_values.npy', np.array(train_loss_values))

In [4]:
final_model = torch.load('./Intital_hw2_p3_model/model3_100epoch_40lr_1e4wt.pt')
for parameter in final_model.parameters():
    print(parameter)

Parameter containing:
tensor([[[[-9.1049e-02, -1.0985e-01, -9.4856e-02],
          [-1.2309e-01, -1.6736e-01, -1.3894e-01],
          [-1.4311e-01, -1.9005e-01, -1.3626e-01]],

         [[-2.5469e-02,  4.4896e-03, -1.8049e-03],
          [-4.8044e-03,  1.4829e-02,  1.1795e-02],
          [-1.6073e-02, -1.1228e-02,  9.2308e-03]],

         [[ 1.1282e-01,  1.9159e-01,  1.5776e-01],
          [ 1.5346e-01,  2.2881e-01,  1.9260e-01],
          [ 1.1169e-01,  1.5747e-01,  1.4522e-01]]],


        [[[-1.4626e-01, -1.4458e-01, -9.0030e-02],
          [-1.0939e-01, -8.8121e-02, -4.2372e-02],
          [-6.9329e-02, -5.6354e-02,  5.3985e-04]],

         [[-1.0835e-01, -8.2343e-02, -6.0253e-02],
          [-4.6809e-02,  5.9892e-03,  1.5381e-02],
          [-1.7302e-02,  1.8963e-02,  4.3907e-02]],

         [[ 2.1354e-02,  8.1501e-02,  6.7217e-02],
          [ 9.7608e-02,  1.8514e-01,  1.6378e-01],
          [ 1.0483e-01,  1.7058e-01,  1.7006e-01]]],


        [[[ 1.1807e-03,  8.2510e-04,  6.9907

In [5]:
## saving these for plotting val_error,val_loss_values,train_error,train_loss_values
import numpy as np
val_error = np.load('./Intital_hw2_p3_model/val_error.npy')
val_loss_values = np.load('./Intital_hw2_p3_model/val_loss_values.npy')
train_error = np.load('./Intital_hw2_p3_model/train_error.npy')
train_loss_values = np.load('./Intital_hw2_p3_model/train_loss_values.npy')
val_error.min()

11.069999999999993

In [43]:
valloader_perturb = torch.utils.data.DataLoader(testset, batch_size=100,
                                          shuffle=True)

In [44]:
valloader_perturb

<torch.utils.data.dataloader.DataLoader at 0x1fd8074a940>

In [45]:
images, labels = next(iter(valloader_perturb))
print(len(labels))

100


In [46]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(final_model.parameters(), lr=0.1, momentum=0.9, weight_decay=0.0001, nesterov=True)

In [47]:
# Move tensors to configured device
images = images.to(device)
labels = labels.to(device)
#Forward Pass
images.requires_grad_()
outputs = final_model(images)
loss = criterion(outputs, labels)
optimizer.zero_grad()
loss.backward()
dx = images.grad.data.clone()

In [48]:
images[0].size()

torch.Size([3, 32, 32])

In [49]:
dx.size()

torch.Size([100, 3, 32, 32])

In [50]:
_, predicted = torch.max(outputs.data, 1)

In [55]:
(predicted == labels)

tensor([ True,  True,  True, False,  True,  True, False,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True, False, False,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True, False,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True, False,  True,  True,  True,  True, False,
         True,  True,  True,  True,  True,  True,  True, False,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
         True,  True,  True,  True,  True,  True,  True,  True,  True,  True,
        False,  True,  True,  True,  True,  True,  True,  True, False,  True,
         True,  True,  True,  True,  True,  True, False,  True, False,  True],
       device='cuda:0')

In [58]:
pred_np = predicted.cpu().numpy()
labels_np = labels.cpu().numpy()
correct_list =  np.argwhere(pred_np==labels_np)


array([[ 0],
       [ 1],
       [ 2],
       [ 4],
       [ 5],
       [ 7],
       [ 8],
       [ 9],
       [10],
       [11],
       [12],
       [13],
       [14],
       [15],
       [16],
       [19],
       [20],
       [21],
       [22],
       [23],
       [24],
       [25],
       [26],
       [27],
       [28],
       [29],
       [30],
       [32],
       [33],
       [34],
       [35],
       [36],
       [37],
       [38],
       [39],
       [40],
       [41],
       [42],
       [43],
       [45],
       [46],
       [47],
       [48],
       [50],
       [51],
       [52],
       [53],
       [54],
       [55],
       [56],
       [58],
       [59],
       [60],
       [61],
       [62],
       [63],
       [64],
       [65],
       [66],
       [67],
       [68],
       [69],
       [70],
       [71],
       [72],
       [73],
       [74],
       [75],
       [76],
       [77],
       [78],
       [79],
       [81],
       [82],
       [83],
       [84],
       [85],