In [7]:
import numpy as np

from torch.utils.data import DataLoader
from torchvision import datasets

from tqdm.notebook import tqdm

import torch
import torch.nn as nn
import torchvision.transforms as Trans

import torch.nn.init as init


In [8]:
BATCH_SIZE = 100
NUM_WORKERS = 4

EPOCHS = 10

In [9]:
transform = Trans.ToTensor()


train_dataset = datasets.MNIST(
    root='datasets',
    train=True,
    transform=transform,
    download=True
)

test_dataset = datasets.MNIST(
    root='datasets',
    train=False,
    transform=transform,
    download=True
)

train_dataloader = DataLoader(
    dataset=train_dataset,
    batch_size=BATCH_SIZE,
    shuffle=True,
    num_workers=NUM_WORKERS,
)

test_dataloader = DataLoader(
    dataset=test_dataset,
    batch_size=BATCH_SIZE,
    num_workers=NUM_WORKERS,
)

In [10]:
class MyNet(nn.Module):
    def __init__(self):
        super().__init__()

        self.conv1 = nn.Sequential(         
            nn.Conv2d(
                in_channels=1,              
                out_channels=16,            
                kernel_size=5,              
                stride=1,                   
                padding=2,                  
            ),                              
            nn.ReLU(),                      
            nn.MaxPool2d(kernel_size=2),    
        )
        self.conv2 = nn.Sequential(         
            nn.Conv2d(16, 32, 5, 1, 2),     
            nn.ReLU(),                      
            nn.MaxPool2d(2),                
        )
        self.out = nn.Linear(32 * 7 * 7, 10)
        
    def forward(self, x):
        x = self.conv1(x)
        x = self.conv2(x)
        x = x.view(x.size(0), -1)       
        output = self.out(x)
        return output

CELoss = nn.CrossEntropyLoss()
net = MyNet()
net.cuda()

optimizer = torch.optim.SGD(net.parameters(), lr=0.01, momentum=0.9)

In [11]:
def train(net, optimizer, criterion):
    running_loss = 0
    for ind, (images, labels) in enumerate(pbar := tqdm(train_dataloader)):
        images = images.cuda()
        labels = labels.cuda()
        optimizer.zero_grad()

        output = net(images)

        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss
        pbar.set_description(f"loss: {loss}")
    with torch.no_grad():
        train_loss = running_loss / len(train_dataloader)
    return train_loss


def valid(net, criterion):
    running_loss = 0
    correct_total = 0
    with torch.no_grad():
        for images, labels in test_dataloader:
            images = images.cuda()
            labels = labels.cuda()
            output = net(images)

            loss = criterion(output, labels)
            running_loss += loss

            pred = output.max(dim = 1, keepdim=True)[1]
            correct_total += pred.eq(labels.view_as(pred)).sum()
            
        precison = correct_total / len(test_dataloader.dataset)
        valid_loss = running_loss / len(test_dataloader)
        return valid_loss, precison

In [12]:
for epoch in (pbar := tqdm(range(EPOCHS))):
    train_loss = train(net, optimizer, CELoss)
    valid_loss, prec = valid(net, CELoss)

    print(f"[{epoch}] train/valid loss: {train_loss:.4f}/{valid_loss:.4f} acc: {prec:.4f}")
    pbar.set_description(f"train/valid loss: {train_loss:.4f}/{valid_loss:.4f} acc: {prec:.4f}")

  0%|          | 0/10 [00:00<?, ?it/s]

  0%|          | 0/600 [00:00<?, ?it/s]

KeyboardInterrupt: 