In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.functional as F
from torch.utils.data import DataLoader
import torchvision.datasets as datasets
import torchvision.transforms as transforms

In [16]:
class CNN(nn.Module):
    def __init__(self, in_channels=1, num_classes=10):
        super(CNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels=in_channels, out_channels=8, kernel_size=3, stride=1, padding=1)
        # [N, 1, 28, 28]  ->  [N, 8, 28, 28]
        self.pool = nn.MaxPool2d(kernel_size=2, stride=2)
        # (28, 28)  ->  (14, 14)
        self.conv2 = nn.Conv2d(in_channels=8, out_channels=16, kernel_size=3, stride=1, padding=1)
        # [N, 8, 14, 14]  ->  [N, 16, 14, 14]
        self.fc = nn.Linear(16*7*7, num_classes)
        
    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = self.pool(x)
        x = F.relu(self.conv2(x))
        x = self.pool(x)
        x = x.reshape(x.shape[0], -1)
        x = self.fc(x)
        
        return x

In [29]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

in_channels=1
num_classes = 10
learning_rate = 1e-5
batch_size = 64
num_epochs = 20

In [17]:
model = CNN(in_channels, num_classes)
model.to(device)

CNN(
  (conv1): Conv2d(1, 8, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (conv2): Conv2d(8, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc): Linear(in_features=784, out_features=10, bias=True)
)

In [26]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [10]:
train_dataset = datasets.MNIST(root='dataset/minist/',train=True, transform=transforms.ToTensor(), download=False)
train_loader = DataLoader(dataset=train_dataset, batch_size=64, shuffle=True)

test_dataset = datasets.MNIST(root='dataset/minist/',train=False, transform=transforms.ToTensor(), download=False)
test_loader = DataLoader(dataset=test_dataset, batch_size=64, shuffle=True)

In [None]:
for epoch in  range(num_epochs):
    losses = []
    for idx, (data, label) in  enumerate(train_loader):
        data = data.to(device)
        label = label.to(device)
        
        pred = model(data)
        loss = criterion(pred, label)
        losses.append(loss)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
    print(f"epoch: {epoch+1}, loss = {sum(losses)/len(losses)}")

epoch: 1, loss = 0.5596543550491333
epoch: 2, loss = 0.5108937621116638
epoch: 3, loss = 0.47366106510162354
epoch: 4, loss = 0.44457727670669556
epoch: 5, loss = 0.421329140663147
epoch: 6, loss = 0.4020666480064392
epoch: 7, loss = 0.38592424988746643
epoch: 8, loss = 0.3717154860496521
epoch: 9, loss = 0.35940617322921753
epoch: 10, loss = 0.34805065393447876
epoch: 11, loss = 0.3378625214099884
epoch: 12, loss = 0.32844972610473633
epoch: 13, loss = 0.3197747468948364
epoch: 14, loss = 0.31178635358810425
epoch: 15, loss = 0.30405208468437195
epoch: 16, loss = 0.296942800283432
epoch: 17, loss = 0.2902800142765045
epoch: 18, loss = 0.283648818731308


In [None]:
model.eval()
total = 0
num_wrong = 0

with torch.no_grad():
    for idx, (data, label) in  enumerate(test_loader):
        data = data.to(device)
        label = label.to(device)
        total += data.shape[0]

        pred = model(data).argmax(1)
        num_wrong += (abs(pred-label)).clamp(0,1).sum()

print(f"Accuracy on  test set : {(1-(num_wrong/total))*100:.2f}%")