In [2]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import torch.nn as nn
import torch.optim as optim

import torch.nn.init

In [3]:
device = 'cuda' if torch.cuda.is_available() else 'cpu'

torch.manual_seed(777)
if device == 'cuda':
    torch.cuda.manual_seed_all(777)

In [4]:
learning_rate = 1e-3
training_epochs = 10
batch_size = 50

In [5]:
mnist_train = dsets.MNIST(root='../Data/MNIST_data/',
                            train = True,
                            transform=transforms.ToTensor(),
                            download=True)
mnist_test = dsets.MNIST(root='../Data/MNIST_data/',
                            train = False,
                            transform=transforms.ToTensor(),
                            download=True)

In [6]:
data_loader = torch.utils.data.DataLoader(dataset = mnist_train,
                                            batch_size = batch_size,
                                            shuffle = True,
                                            drop_last = True)

In [7]:
# light model because of training without gpu
class CNN(nn.Module):
    def __init__(self):
        super(CNN, self).__init__()
        self.layer1 = nn.Sequential(
            nn.Conv2d(1, 5, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.layer2 = nn.Sequential(
            nn.Conv2d(5, 10, 3, padding=1),
            nn.ReLU(),
            nn.MaxPool2d(2)
        )

        self.fc = nn.Linear(7*7*10, 10)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out



In [8]:
model = CNN().to(device)
model

CNN(
  (layer1): Sequential(
    (0): Conv2d(1, 5, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (layer2): Sequential(
    (0): Conv2d(5, 10, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (1): ReLU()
    (2): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  )
  (fc): Linear(in_features=490, out_features=10, bias=True)
)

In [9]:
criterion = nn.CrossEntropyLoss().to(device)
optimizer = optim.Adam(model.parameters(), lr = learning_rate)

In [10]:
total_batch = len(data_loader)

for epoch in range(training_epochs):
    avg_cost = 0
    for x, y in data_loader:
        x = x.to(device)
        y = y.to(device)

        optimizer.zero_grad()
        hypothesis = model(x)

        cost = criterion(hypothesis, y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch
    print(f'epoch = {epoch}, cost = {avg_cost}')

epoch = 0, cost = 0.3259534537792206
epoch = 1, cost = 0.113211490213871
epoch = 2, cost = 0.08737830817699432
epoch = 3, cost = 0.07434052228927612
epoch = 4, cost = 0.06633111089468002
epoch = 5, cost = 0.06042223796248436
epoch = 6, cost = 0.055699702352285385
epoch = 7, cost = 0.05156499147415161
epoch = 8, cost = 0.04693608731031418
epoch = 9, cost = 0.045060981065034866


In [11]:
with torch.no_grad():
     x_test = mnist_test.test_data.view(len(mnist_test), 1, 28, 28).float().to(device)
     y_test = mnist_test.test_labels.to(device)

     prediction = model(x_test)
     correct_prediction = torch.argmax(prediction, 1) == y_test
     accuracy = correct_prediction.float().mean()
     print(f'Accuracy = {accuracy}')



Accuracy = 0.9803000092506409
