In [18]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F
from torch.utils.data import DataLoader
from torchvision import datasets, transforms

In [2]:
# check device
DEVICE = 'cuda' if torch.cuda.is_available() else 'cpu'
print(DEVICE)

cpu


In [3]:
transforms = transforms.Compose([transforms.Resize((32, 32)),
                                 transforms.ToTensor()])

train_dataset = datasets.MNIST(root='mnist_data', 
                               train=True, 
                               transform=transforms,
                               download=True)

valid_dataset = datasets.MNIST(root='mnist_data', 
                               train=False, 
                               transform=transforms)

Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw/train-images-idx3-ubyte.gz


  0%|          | 0/9912422 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/train-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz


  0%|          | 0/28881 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/train-labels-idx1-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz


  0%|          | 0/1648877 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/t10k-images-idx3-ubyte.gz to mnist_data/MNIST/raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz


  0%|          | 0/4542 [00:00<?, ?it/s]

Extracting mnist_data/MNIST/raw/t10k-labels-idx1-ubyte.gz to mnist_data/MNIST/raw



In [4]:
BATCH_SIZE = 64
train_loader = DataLoader(dataset=train_dataset, 
                          batch_size=BATCH_SIZE, 
                          shuffle=True)

valid_loader = DataLoader(dataset=valid_dataset, 
                          batch_size=BATCH_SIZE, 
                          shuffle=False)

In [6]:
class LeNet(nn.Module):
    def __init__(self) -> None:
        super().__init__()
        self.conv1= nn.Conv2d(1, 6, 5)
        self.conv2= nn.Conv2d(6, 16, 5)
        self.conv3= nn.Conv2d(16, 120, 5)
        self.pool= nn.AvgPool2d(2,2)
        self.l1= nn.Linear(120, 84)
        self.l2= nn.Linear(84,10)
        self.tanh= nn.Tanh()
        self.out= nn.Softmax(dim=1)
    def forward(self, image):
        out= self.tanh(self.conv1(image))
        out= self.pool(out)
        out= self.tanh(self.conv2(out))
        out= self.pool(out)
        out= self.tanh(self.conv3(out))
        out= torch.flatten(out,1)
        out= self.tanh(self.l1(out))
        out= self.out(self.l2(out))
        return out

In [14]:
Net= LeNet()
loss_function= nn.CrossEntropyLoss()
optimizer= torch.optim.Adam(Net.parameters(), lr= 0.01)
print(Net)

LeNet(
  (conv1): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1))
  (conv2): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
  (conv3): Conv2d(16, 120, kernel_size=(5, 5), stride=(1, 1))
  (pool): AvgPool2d(kernel_size=2, stride=2, padding=0)
  (l1): Linear(in_features=120, out_features=84, bias=True)
  (l2): Linear(in_features=84, out_features=10, bias=True)
  (tanh): Tanh()
  (out): Softmax(dim=1)
)


In [15]:
def train(dataloader, model, loss_fn, optimizer):
    size = len(dataloader.dataset)
    model.train()
    for batch, (X, y) in enumerate(dataloader):

        # Compute prediction error
        pred = model(X)
        loss = loss_fn(pred, y)

        # Backpropagation
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

        if batch % 100 == 0:
            loss, current = loss.item(), batch *len(X)
            print(f"loss: {loss:>7f}  [{current:>5d}/{size:>5d}]")

In [16]:
def test(dataloader, model, loss_fn):
    size = len(dataloader.dataset)
    num_batches = len(dataloader)
    model.eval()
    test_loss, correct = 0, 0
    with torch.no_grad():
        for X, y in dataloader:
            
            pred = model(X)
            test_loss += loss_fn(pred, y).item()
            correct += (pred.argmax(1) == y).type(torch.float).sum().item()
    test_loss /= num_batches
    correct /= size
    print(f"Test Error: \n Accuracy: {(100*correct):>0.1f}%, Avg loss: {test_loss:>8f} \n")

In [17]:
epochs = 5
for t in range(epochs):
    print(f"Epoch {t+1}\n-------------------------------")
    train(train_loader, Net, loss_function, optimizer)
    test(valid_loader, Net, loss_function)
print("Done!")

Epoch 1
-------------------------------
loss: 2.303886  [    0/60000]
loss: 1.625683  [ 6400/60000]
loss: 1.486842  [12800/60000]
loss: 1.579942  [19200/60000]
loss: 1.565837  [25600/60000]
loss: 1.560014  [32000/60000]
loss: 1.490695  [38400/60000]
loss: 1.469507  [44800/60000]
loss: 1.547225  [51200/60000]
loss: 1.503957  [57600/60000]
Test Error: 
 Accuracy: 93.7%, Avg loss: 1.525528 

Epoch 2
-------------------------------
loss: 1.522916  [    0/60000]
loss: 1.504949  [ 6400/60000]
loss: 1.521048  [12800/60000]
loss: 1.512124  [19200/60000]
loss: 1.554952  [25600/60000]
loss: 1.477801  [32000/60000]
loss: 1.498871  [38400/60000]
loss: 1.503659  [44800/60000]
loss: 1.523053  [51200/60000]
loss: 1.563693  [57600/60000]
Test Error: 
 Accuracy: 94.3%, Avg loss: 1.518573 

Epoch 3
-------------------------------
loss: 1.520055  [    0/60000]
loss: 1.526810  [ 6400/60000]
loss: 1.515548  [12800/60000]
loss: 1.478248  [19200/60000]
loss: 1.505945  [25600/60000]
loss: 1.497216  [32000/600