In [1]:
import torch
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from sklearn.metrics import confusion_matrix
from torch import nn

In [2]:
transforms = transforms.Compose([transforms.ToTensor(), transforms.Normalize((0.5, ), (0.5, ))])
train = datasets.MNIST('.', train= True, download= True, transform= transforms)
test = datasets.MNIST('.', train= False, download= True, transform=transforms)
train_loader = DataLoader(train, batch_size=64, shuffle= True)
test_loader = DataLoader(test, batch_size= 64, shuffle= True)

In [3]:
class CNN(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(nn.Conv2d(1, 32, kernel_size=3),
                                 nn.ReLU(),
                                 nn.MaxPool2d((2, 2), stride= 2),
                                 nn.Conv2d(32, 64, kernel_size=3), 
                                 nn.ReLU(),
                                 nn.MaxPool2d((2, 2), stride=2),
                                 nn.Conv2d(64, 32, kernel_size= 3),
                                 nn.ReLU(),
                                 nn.MaxPool2d((2, 2), stride= 2))
        self.classify_head = nn.Sequential(nn.Linear(32, 20, bias= True),
                                           nn.Linear(20, 10, bias= True))
    
    def forward(self, x):
        return self.classify_head(self.net(x).reshape(-1, 32))

In [4]:
model = CNN()
criterion = torch.nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(model.parameters(), lr=0.001)

In [5]:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
print(device)
model.to(device=device)

cuda


CNN(
  (net): Sequential(
    (0): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (3): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1))
    (4): ReLU()
    (5): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
    (6): Conv2d(64, 32, kernel_size=(3, 3), stride=(1, 1))
    (7): ReLU()
    (8): MaxPool2d(kernel_size=(2, 2), stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (classify_head): Sequential(
    (0): Linear(in_features=32, out_features=20, bias=True)
    (1): Linear(in_features=20, out_features=10, bias=True)
  )
)

In [6]:
for epoch in range(10):
    model.train()
    running_loss = 0.0
    for input, target in train_loader:
        input, target = input.to(device), target.to(device)
        optimizer.zero_grad()
        output = model(input)
        loss = criterion(output, target)
        loss.backward()
        running_loss += loss.item()
        optimizer.step()
    print(f'Epoch - {epoch}, loss = {running_loss}')

Epoch - 0, loss = 2152.789535045624
Epoch - 1, loss = 2129.845997095108
Epoch - 2, loss = 2091.6957812309265
Epoch - 3, loss = 2007.2402691841125
Epoch - 4, loss = 1804.8486211299896
Epoch - 5, loss = 1457.290182352066
Epoch - 6, loss = 1098.312424480915
Epoch - 7, loss = 826.8714784383774
Epoch - 8, loss = 642.554775685072
Epoch - 9, loss = 525.995644569397


In [7]:
# file gets created automatically
torch.save(model, '../lab 6/ModelFiles/model.pt')

In [9]:
model.eval()
all_preds, all_labels = [], []
with torch.no_grad():
    for input, target in test_loader:
        output = model(input.to('cuda'))
        val, index = torch.max(output, 1)
        all_preds.extend(index.to('cpu'))
        all_labels.extend(target.to('cpu'))
cm = confusion_matrix(all_labels, all_preds)
print(cm)
print(sum(p.numel() for p in model.parameters() if p.requires_grad))

[[ 930    1    7    0    2    6   24    9    1    0]
 [   0 1096    2    3    0    1    1    1   31    0]
 [  15    1  885   18    3    8   18   46   31    7]
 [   0    8   26  844    0   38    0   24   61    9]
 [   2    5    0    0  854    1   67    0    9   44]
 [  14   17    5   88   11  662    2    8   64   21]
 [  23    9    8    0   40   14  854    0    7    3]
 [   5   10   65    6    0    2    0  887   18   35]
 [   3   34   12   69   17   70   16    9  682   62]
 [  20    9    1    7   17   36   14   21   33  851]]
38150


In [None]:
from sklearn.metrics import accuracy_score
print(accuracy_score(all_labels, all_preds))

0.871
