In [2]:
import torch
import torchvision
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

MNIST dataset have images that have size (28,28) therefore we resize them into (32,32). We also Normalize pixels from [0,1] to [-1,1]. We do that so we can get negative gradients. If we had only [0,1] then gradient would always be positive and it would screw with our optimization.

In [9]:
transform = transforms.Compose([
    transforms.Resize((32, 32)),  
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

In [10]:
train_dataset = torchvision.datasets.MNIST(root='./data', 
                                           train=True, 
                                           transform=transform, 
                                           download=True)

100%|█████████████████████████████████████████████████████████████████████████████| 9.91M/9.91M [00:02<00:00, 4.64MB/s]
100%|██████████████████████████████████████████████████████████████████████████████| 28.9k/28.9k [00:00<00:00, 233kB/s]
100%|█████████████████████████████████████████████████████████████████████████████| 1.65M/1.65M [00:00<00:00, 1.91MB/s]
100%|█████████████████████████████████████████████████████████████████████████████████████| 4.54k/4.54k [00:00<?, ?B/s]


In [13]:
BATCH_SIZE = 64
SHUFFLE = True

In [15]:
dataloader = DataLoader(dataset = train_dataset, batch_size = BATCH_SIZE, shuffle = SHUFFLE)

In [24]:
data_iter = iter(dataloader)
img, label = next(data_iter)
print(f'Rozmiar obrazków = {img.shape}')
print(f'Rozmiar obrazków = {label.shape}')

Rozmiar obrazków = torch.Size([64, 1, 32, 32])
Rozmiar obrazków = torch.Size([64])


For a sake of simplicity i will hardcode the layers sizes etc

In [31]:
import torch.nn as nn
class LeNet(nn.Module):
    def __init__(self):
        self.C1 = nn.Conv2d(in_channels = 1,out_channels = 6, kernel_size = (5,5))
        self.S2 = nn.AvgPool2d(kernel_size=(2,2),stride=2)
        self.C3 = nn.Conv2d(in_channels = 6,out_channels = 16, kernel_size = (5,5))
        self.S4 = nn.AvgPool2d(kernel_size = (2,2), stride = 2)
        self.L5 = nn.Linear(400,120)
        self.L6 = nn.Linear(120,84)
        self.Output = nn.Linear(84,10)
        
    def forward(self, x):
        x = self.C1(x)
        x = torch.tanh(x)
        x = self.S2(x)

        x = self.C3(x)
        x = torch.tanh(x)
        x = self.S4(x)

        x = x.view(-1,400)

        x = self.L5(x)
        x = torch.tanh(x)
        x = self.L6(x)
        x = torch.tanh(x)
        x = self.Output(x)
        return x
        
        
        

In [33]:
model = LeNet