In [3]:
import torch
import torchvision.datasets as dsets
import torchvision.transforms as transforms
import random

In [6]:
random.seed(0)
torch.manual_seed(0)

device = torch.device("mps")

learning_rate = .001
training_epochs = 15
batch_size = 100

In [14]:
mnist_train = dsets.MNIST(root='dset/', train=True, transform=transforms.ToTensor(), download=True)
mnist_test = dsets.MNIST(root='dset/', train=False, transform=transforms.ToTensor(), download=True)

data_loader = torch.utils.data.DataLoader(dataset=mnist_train, batch_size=batch_size, shuffle=True, drop_last=True)

In [None]:
class CNN(torch.nn.Module):
    def __init__(self):
        super(CNN, self).__init__()

        self.layer1 = torch.nn.Sequential(
            torch.nn.Conv2d(1, 32, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # L1 Image size = (?, 1, 28, 28)
        # L1 Conv       = (?, 32, 28, 28)
        # L1 Pool       = (?, 32, 14, 14)
        self.layer2 = torch.nn.Sequential(
            torch.nn.Conv2d(32, 64, kernel_size=3, stride=1, padding=1),
            torch.nn.ReLU(),
            torch.nn.MaxPool2d(kernel_size=2, stride=2)
        )
        # L2 Conv       = (?, 64, 14, 14)
        # L2 Pool       = (?, 64, 7, 7)
        # L2 Output     = (?, 64, 7, 7)
        self.fc = torch.nn.Linear(7 * 7 * 64, 10, bias=True)
        # FC            = (?, 10)
        torch.nn.init.xavier_uniform_(self.fc.weight)

    def forward(self, x):
        out = self.layer1(x)
        out = self.layer2(out)
        out = out.view(out.size(0), -1)
        out = self.fc(out)
        return out

In [10]:
model = CNN().to(device)

In [12]:
criterion = torch.nn.CrossEntropyLoss().to(device)
optimizer = torch.optim.Adam(model.parameters(), lr=learning_rate)

In [15]:
total_batch = len(data_loader)
print('Learning started. It takes sometime.')
for epoch in range(training_epochs):
    avg_cost = 0

    for X, Y in data_loader:
        X = X.to(device)
        Y = Y.to(device)

        optimizer.zero_grad()
        hypothesis = model(X)
        cost = criterion(hypothesis, Y)
        cost.backward()
        optimizer.step()

        avg_cost += cost / total_batch

    print('[Epoch: {:>4}] cost = {:>.9}'.format(epoch + 1, avg_cost))

print('Learning Finished!')

Learning started. It takes sometime.
[Epoch:    1] cost = 0.215982541
[Epoch:    2] cost = 0.0624012761
[Epoch:    3] cost = 0.0444372036
[Epoch:    4] cost = 0.0355485827
[Epoch:    5] cost = 0.0290135574
[Epoch:    6] cost = 0.0254430473
[Epoch:    7] cost = 0.0199335851
[Epoch:    8] cost = 0.0176872481
[Epoch:    9] cost = 0.0148797687
[Epoch:   10] cost = 0.0121472888
[Epoch:   11] cost = 0.0101141762
[Epoch:   12] cost = 0.00804429315
[Epoch:   13] cost = 0.00741379103
[Epoch:   14] cost = 0.00584084122
[Epoch:   15] cost = 0.00641191658
Learning Finished!
