In [None]:
import torch
import torchvision
import torchvision.transforms as transforms
import numpy as np
import matplotlib.pyplot as plt 
import pandas as pd 
%matplotlib inline 
path_data = './data/'

## Getting the data 

In [None]:
train = torchvision.datasets.CIFAR10(root=path_data, train=True, download=True) 
test = torchvision.datasets.CIFAR10(root=path_data, train=False, download=True) 

Let's look at `train`. What we get from this is a class called `CIFAR10`. 

In [None]:
type(train)

In [None]:
cifar_transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,0.5,0.5), (0.5,0.5,0.5))
])

In [None]:
train.transform = cifar_transform
test.transform = cifar_transform
train.transforms = torchvision.datasets.vision.StandardTransform(cifar_transform)
test.transforms = torchvision.datasets.vision.StandardTransform(cifar_transform)


In [None]:
print(train.transform)
print('\n######\n')
print(train.transforms)

In [None]:
trainloader = torch.utils.data.DataLoader(train, batch_size=4,
                                          shuffle=True, num_workers=2)
testloader = torch.utils.data.DataLoader(train, batch_size=4,
                                          shuffle=False, num_workers=2)

In [None]:
train_iter = iter(trainloader)
images, labels = train_iter.next()
print(images[0])

In [None]:
def plot_images(images, labels): 
    # normalise=True below shifts [-1,1] to [0,1]
    img_grid = torchvision.utils.make_grid(images, nrow=4, normalize=True)
    np_img = img_grid.numpy().transpose(1,2,0)  
    plt.imshow(np_img)
    
d_class2idx = train.class_to_idx
d_idx2class = dict(zip(d_class2idx.values(),d_class2idx.keys()))

images, labels = train_iter.next()
plot_images(images,labels)
print(' '.join('%5s' % d_idx2class[int(labels[j])]for j in range(len(images))))

In [None]:
import torch.nn as nn 
import torch.nn.functional as F 

$$\frac{W−K+2P}{S} +1$$

- W is the input volume
- K is the kernel size
- P is the amount of padding
- S is the stride size

64, 'M', 128, 'M', 256, 256, 'M', 512, 512, 'M', 512, 512, 'M']

In [None]:
class Net(nn.Module): 
    def __init__(self): 
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=3, out_channels=64, kernel_size=3, padding=1)
        self.pool = nn.MaxPool2d(2,2)
        self.conv2 = nn.Conv2d(in_channels=64, out_channels=128, kernel_size=3,padding=1) 
        self.conv3 = nn.Conv2d(in_channels=128, out_channels=256, kernel_size=3,padding=1) 
        self.conv4 = nn.Conv2d(in_channels=256, out_channels=256, kernel_size=3,padding=1) 
        self.conv5 = nn.Conv2d(in_channels=256, out_channels=512, kernel_size=3,padding=1) 
        self.conv6 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,padding=1) 
        self.conv7 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,padding=1) 
        self.conv8 = nn.Conv2d(in_channels=512, out_channels=512, kernel_size=3,padding=1) 
        
        # 5*5 comes from the dimension of the last convnet layer
        self.fc1 = nn.Linear(512,512) 
        self.fc2 = nn.Linear(512,512)
        self.fc3 = nn.Linear(512, 10)
        
    def forward(self, x): 
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = F.relu(self.conv3(x))
        x = self.pool(F.relu(self.conv4(x)))
        x = F.relu(self.conv5(x))
        x = self.pool(F.relu(self.conv6(x)))
        x = F.relu(self.conv7(x))
        x = self.pool(F.relu(self.conv8(x)))
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.relu(self.fc2(x))
        x = self.fc3(x)  # no activation on final layer 
        return x

net = Net()

In [None]:
import torch.optim as optim

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(net.parameters(), lr=0.001, momentum=0.9)

In [None]:
# variable to save the value of the loss
running_loss = 0 

# print when the counter is divisible by this value
printfreq = 1000

# training loop
num_epochs = 2
for epoch in range(num_epochs):
    for i, data in enumerate(trainloader):
        inputs, labels = data
        optimizer.zero_grad()
        outputs = net(inputs)  # forward pass 
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item()
        if i % printfreq == printfreq-1:  
            print("Epoch: {}, Training Loss: {}".format(epoch, running_loss / printfreq))
            running_loss = 0 

In [None]:
for param_tensor in net.state_dict():
    print(param_tensor, "\t", net.state_dict()[param_tensor].size())

In [None]:
print(optimizer.state_dict().keys())
print(optimizer.state_dict()['param_groups'])

In [None]:
fname = './models/CIFAR10_cnn.pth'
torch.save(net.state_dict(), fname)
loaded_dict = torch.load(fname)
net.load_state_dict(loaded_dict)

In [None]:
net.eval()

In [None]:
# Reload net if needed
fname = './models/CIFAR10_cnn.pth'
loaded_dict = torch.load(fname)
net.load_state_dict(loaded_dict)

```python
{'airplane': 0, 'automobile': 1, 'bird': 2, 'cat': 3, 'deer': 4, 'dog': 5, 'frog': 6, 'horse': 7, 'ship': 8, 'truck': 9}
```

In [None]:
dataiter = iter(testloader)
images, labels = dataiter.next()
plot_images(images,labels)
print(' '.join('%5s' % d_idx2class[int(labels[j])]for j in range(len(images))))

In [None]:
# LOGIT

In [None]:
outputs = net(images)
print(outputs)

In [None]:
preds = outputs.argmax(dim=1)
plot_images(images,preds)
print(' '.join('%5s' % d_idx2class[int(preds[j])]for j in range(len(images))))

In [None]:
class_correct = list(0 for i in range(10))  # Holds how many correct images for the class
class_total = list(0 for i in range(10))  # Holds total images for the class 

with torch.no_grad(): 
    for i, data in enumerate(testloader): 
        images, labels = data 
        outputs = net(images) 
        _, predicted = torch.max(outputs, 1)
        c = (predicted == labels)
        for j in range(4): 
            label = labels[j]
            class_correct[label] += c[j].item()
            class_total[label] += 1
            
for i in range(10):
    print('Accuracy of %5s : %2d %%' % (
        d_idx2class[i], 100 * class_correct[i] / class_total[i]))

In [None]:
from sam import SAM

In [None]:
# From SAM paper: https://arxiv.org/abs/2010.01412
base_optimizer = torch.optim.SGD  # define an optimizer for the "sharpness-aware" update
optimizer = SAM(net.parameters(), base_optimizer, lr=0.1, momentum=0.9)


# variable to save the value of the loss
running_loss = 0 

# print when the counter is divisible by this value
printfreq = 1000

# training loop
num_epochs = 2
for epoch in range(num_epochs):
    for i, data in enumerate(trainloader):
        inputs, labels = data
        optimizer.zero_grad()
        
        # first forward-backward pass 
        outputs = net(inputs)  # forward pass 
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.first_step(zero_grad=True)
        
        # second forward-backward pass
        outputs = net(inputs)  # forward pass 
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.second_step(zero_grad=True)
        
        running_loss += loss.item()
        if i % printfreq == printfreq-1:  
            print("Epoch: {}, Training Loss: {}".format(epoch, running_loss / printfreq))
            running_loss = 0 