In [None]:
!nvidia-smi

In [None]:
import math
import time
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from torchvision import datasets
from torchvision.utils import make_grid
from torchvision.transforms import ToTensor
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt


cuda = torch.device("cuda")
cpu = torch.device("cpu")
device = cuda if torch.cuda.is_available() else cpu
print(device)

In [None]:
np.random.seed(123)
torch.manual_seed(123)

mnist = datasets.MNIST(
    root="data",
    train=True,
    download=True,
    transform=ToTensor()
)
loader = DataLoader(mnist, batch_size=20000, shuffle=True)

x, y = next(iter(loader))
x = x.to(device=device)
y = y.to(device=device)

print(x.size())
img = x[0].squeeze().cpu().numpy()
img
plt.imshow(img, cmap="gray")
plt.show()

img = x[99].squeeze().cpu().numpy()
img
plt.imshow(img, cmap="gray")
plt.show()

img = x[999].squeeze().cpu().numpy()
img
plt.imshow(img, cmap="gray")
plt.show()

img = x[9999].squeeze().cpu().numpy()
img
plt.imshow(img, cmap="gray")
plt.show()

In [None]:
x[0:5].shape

In [None]:
class MNISTModel(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(in_channels=1, out_channels=16, kernel_size=5)
        self.conv2 = nn.Conv2d(in_channels=16, out_channels=16, kernel_size=5)
        self.fc1 = nn.Linear(in_features=256, out_features=512)
        self.fc2 = nn.Linear(in_features=512, out_features=10)

    def forward(self, x):
        x = x.view(-1, 1, 28, 28)
        x = F.relu(self.conv1(x))            # 16 x 24 x 24
        x = F.max_pool2d(x, kernel_size=2)   # 16 x 12 x 12
        x = F.relu(self.conv2(x))            # 16 x 8 x 8
        x = F.max_pool2d(x, kernel_size=2)   # 16 x 4 x 4
        x = x.view(-1, 256)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [None]:
np.random.seed(123)
torch.manual_seed(123)
model = MNISTModel()
model = model.to(device=device)
print(model)

In [None]:
nepoch = 10
batch_size = 256

opt = torch.optim.Adam(model.parameters(), lr=0.001)

n = x.shape[0]
obs_id = np.arange(n)  # [0, 1, ..., n-1]
# Run the whole data set `nepoch` times
for i in range(nepoch):
    # Shuffle observation IDs
    np.random.shuffle(obs_id)

    # Update on mini-batches
    for j in range(0, n, batch_size):
        # Create mini-batch
        x_mini_batch = x[obs_id[j:(j + batch_size)]]
        y_mini_batch = y[obs_id[j:(j + batch_size)]]
        # Compute loss
        pred = model(x_mini_batch)
        lossfn = nn.CrossEntropyLoss()
        loss = lossfn(pred, y_mini_batch)
        # Compute gradient and update parameters
        opt.zero_grad()
        loss.backward()
        opt.step()

        if (j // batch_size) % 10 == 0:
            print(f"epoch {i}, batch {j // batch_size}, loss = {loss.item()}")

In [None]:
pred = F.softmax(model(x[0].view(-1, 1, 28, 28)))
print(np.round(pred.detach().cpu().numpy(), 3))

pred = F.softmax(model(x[99].view(-1, 1, 28, 28)))
print(np.round(pred.detach().cpu().numpy(), 3))

pred = F.softmax(model(x[999].view(-1, 1, 28, 28)))
print(np.round(pred.detach().cpu().numpy(), 3))

pred = F.softmax(model(x[9999].view(-1, 1, 28, 28)))
print(np.round(pred.detach().cpu().numpy(), 3))