In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torchvision import datasets, transforms

In [2]:
from tqdm.notebook import tqdm

## Dataset

In [3]:
BATCH_SIZE = 10000

transform=transforms.Compose([
        transforms.Resize(28),
        transforms.ToTensor(),
        transforms.Normalize((0.1307,), (0.3081,)),
        ])

dataset1 = datasets.MNIST('../data', 
                          train=True, 
                          download=True,
                          transform=transform)

dataset2 = datasets.MNIST('../data', 
                          train=False,
                          transform=transform)

train_loader = torch.utils.data.DataLoader(dataset1, batch_size=BATCH_SIZE, shuffle=True)
test_loader = torch.utils.data.DataLoader(dataset2, batch_size=BATCH_SIZE)

In [4]:
print(f"Length of train_dataloader: {len(train_loader)}, batches of {BATCH_SIZE} ...")
print(f"Length of test_dataloader: {len(test_loader)}, batches of {BATCH_SIZE} ...")

Length of train_dataloader: 6, batches of 10000 ...
Length of test_dataloader: 1, batches of 10000 ...


In [5]:
# Make device agnostic code
device = "cuda" if torch.cuda.is_available() else "cpu"
device

'cuda'

## Model

In [6]:
class CNNModel(nn.Module):
    def __init__(self):
        super(CNNModel, self).__init__()
        self.conv1 = nn.Conv2d(1, 16, kernel_size=3, stride=1, padding=1)
        self.conv2 = nn.Conv2d(16, 32, kernel_size=3, stride=1, padding=1)
        self.fc1 = nn.Linear(32 * 7 * 7, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = F.relu(self.conv1(x))
        x = F.max_pool2d(x, 2)
        x = F.relu(self.conv2(x))
        x = F.max_pool2d(x, 2)
        x = x.view(-1, 32 * 7 * 7)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x
    
# Create an instance of the CNN model
model = CNNModel().to(device)

## Training

In [7]:
# Setup loss functin and optimizer
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.SGD(params=model.parameters(), lr=0.1)

In [8]:
# Set the number of epochs (we'll keep this small for faster training time)
epochs = 15

for epoch in tqdm(range(epochs)):
    for batch, (X, y) in enumerate(train_loader):
        model.train()
        
        # Put data on target device
        X, y = X.to(device), y.to(device)
        
        #1. Forward pass
        y_pred = model(X)
        
        # 2. Calculate loss (per batch)
        loss = loss_fn(y_pred, y)

        # 3. Optimizer zero grad
        optimizer.zero_grad()
        
        # 4. Loss backward
        loss.backward()
        
        # 5. Optimizer step
        optimizer.step()

  0%|          | 0/15 [00:00<?, ?it/s]

## Testing 

In [9]:
def accuracy_fn(y_true, y_pred):
    
    correct = torch.eq(y_true, y_pred).sum().item()
    acc = (correct / len(y_pred)) * 100
    return acc

In [10]:
model.eval()

CNNModel(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1568, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [11]:
for X, y in test_loader:
    
    X, y = X.to(device), y.to(device)
    y_pred = model(X)
    print(accuracy_fn(y_true = y, y_pred = y_pred.argmax(dim = 1)))

94.54


## Save

In [12]:
torch.save(obj=model.state_dict(), 
           f='./mnist_model.pt')

In [13]:
loaded_model = CNNModel()
loaded_model.load_state_dict(torch.load(f="./mnist_model.pt"))
loaded_model.to('cpu')

CNNModel(
  (conv1): Conv2d(1, 16, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(16, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (fc1): Linear(in_features=1568, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)

In [14]:
for X, y in test_loader:
    X, y = X.to('cpu'), y.to('cpu')
    y_pred = loaded_model(X)
    print(accuracy_fn(y_true = y, y_pred = y_pred.argmax(dim = 1)))

94.54
