In [62]:
import os
import tqdm
from models import SimpleCNN
from datetime import datetime

# Import Pytorch
import torch
import torch.nn as nn
import torch.optim as optim

# Import torchvision
from torchvision.datasets import mnist
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

import matplotlib.pyplot as plt

print(f"PyTorch version: {torch.__version__}\nTorchvision version: {torchvision.__version__}")




PyTorch version: 2.6.0+cu124
Torchvision version: 0.21.0+cu124


Load Dataset

In [52]:
# Transform the data. Convert to tensor and normalise to [0,1]

transform = transforms.Compose([
    transforms.ToTensor(),
    transforms.Normalize((0.5,), (0.5,))
])

# Load the MNIST dataset
train_dataset = mnist.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = mnist.MNIST(root='./data', train=False, download=True, transform=transform)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=64, shuffle=False)

print(f'Training samples: {len(train_loader.dataset)}')
print(f'Testing samples: {len(test_loader.dataset)}')



Training samples: 60000
Testing samples: 10000


In [65]:
model = SimpleCNN()
print(model)

SimpleCNN(
  (conv1): Conv2d(1, 32, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (conv2): Conv2d(32, 64, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
  (pool): MaxPool2d(kernel_size=2, stride=2, padding=0, dilation=1, ceil_mode=False)
  (fc1): Linear(in_features=3136, out_features=128, bias=True)
  (fc2): Linear(in_features=128, out_features=10, bias=True)
)


Define Loss function and optimiser

In [55]:
# Define the loss function and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

Train

In [56]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
model.to(device)

# Training loop
epochs = 5
for epoch in range(epochs):
    model.train() # Set model to trainning mode
    loss = 0.0
    for img, label in tqdm.tqdm(train_loader):
        img, label = img.to(device), label.to(device) # Move data to GPU if available

        optimizer.zero_grad() # Zero the gradients. Clears old gradients to avoid accumulation
        
        output = model(img)
        loss = criterion(output, label) # Compute the loss based on the output and the true labels
        loss.backward() # Backpropagation
        optimizer.step() # Update the weights

        loss += loss.item() # Accumulate the loss

    print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss/len(train_loader):.4f}", end='\r') # Print the loss for each epoch

100%|██████████| 938/938 [00:20<00:00, 46.56it/s]


Epoch [1/5], Loss: 0.0004

100%|██████████| 938/938 [00:21<00:00, 43.57it/s]


Epoch [2/5], Loss: 0.0001

100%|██████████| 938/938 [00:22<00:00, 41.11it/s]


Epoch [3/5], Loss: 0.0000

100%|██████████| 938/938 [00:21<00:00, 42.74it/s]


Epoch [4/5], Loss: 0.0003

100%|██████████| 938/938 [00:21<00:00, 42.72it/s]

Epoch [5/5], Loss: 0.0000




Evaluation Model

In [57]:
true = 0
total = 0

model.eval() # Set model to evaluation mode

with torch.no_grad(): # No need to compute gradients during evaluation
    for img, label in tqdm.tqdm(test_loader):
        img, label = img.to(device), label.to(device)

        output = model(img)
        _, predicted = torch.max(output, 1) # Get the index of the max log-probability
        total += label.size(0)
        true += (predicted == label).sum().item()

accuracy = (true / total) * 100
print(f"Test accuracy: {accuracy:.2f}%")



100%|██████████| 157/157 [00:02<00:00, 57.33it/s]

Test accuracy: 98.85%





In [68]:
model_dir = 'saved_models'

if not os.path.exists(model_dir):
    os.makedirs(model_dir)
    

timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
torch.save(model.state_dict(), f"{model_dir}/mnist_model.pth")
print("Model saved!")


Model saved!


In [5]:
import requests
import base64
import json

# Path to your default image
image_path = "default_digits/default_digit_6_1.png"

# Read and encode the image
with open(image_path, "rb") as image_file:
    encoded_image = base64.b64encode(image_file.read()).decode('utf-8')

# Prepare the payload
payload = {
    "image": encoded_image,
    "modelPath": "saved_models/mnist_model.pth"
}

# Send the request
response = requests.post(
    "http://localhost:8000/predict",
    headers={"Content-Type": "application/json"},
    json=payload
)

# Display the result
print("Status Code:", response.status_code)
print("Response:", json.dumps(response.json(), indent=2))

Status Code: 200
Response: {
  "prediction": 8,
  "confidence": 0.10735628753900528
}
