In [1]:
import torch
import torchvision
from torch.utils import data
from torchvision import transforms
import numpy as np
import torch.nn as nn
import matplotlib.pyplot as plt
plt.style.use('ggplot')



In [2]:
# Set random seed for reproducibility
torch.manual_seed(42)

# Transformation: Resize MNIST images to 32x32
trans = transforms.Compose([
    transforms.Resize((32, 32)),  # Resize to 32x32
    transforms.ToTensor(),          # Convert to tensor
    transforms.Normalize((0.5,), (0.5,)),       
])

# Download and load FashionMNIST dataset
mnist_train = torchvision.datasets.MNIST(root="../data", train=True, transform=trans, download=True)
mnist_test = torchvision.datasets.MNIST(root="../data", train=False, transform=trans, download=True)

In [3]:
# DataLoader
train_iter = data.DataLoader(mnist_train, batch_size=256, shuffle=True)
test_iter = data.DataLoader(mnist_test, batch_size=256, shuffle=False)

In [4]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [5]:
class LeNet(nn.Module):
    def __init__(self):
        super().__init__()
        self.net = nn.Sequential(
            nn.Conv2d(1, 6, kernel_size=5, padding= 'valid'), nn.Tanh(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Conv2d(6,16,kernel_size=5, padding='valid'), nn.Tanh(),
            nn.AvgPool2d(kernel_size=2, stride=2),
            nn.Flatten(),
            nn.Linear(16 * 5 * 5, 120), nn.Tanh(),
            nn.Linear(120, 84), nn.Tanh(),
            nn.Linear(84, 10)
        )
    
    def forward(self,X):
        return self.net(X)

    def predict(self, X):
        with torch.no_grad():
            logits = self.forward(X)
            return torch.argmax(logits, dim=1)

In [6]:
from torchinfo import summary

model = LeNet()
summary(model, input_size=(1,1,32,32))

Layer (type:depth-idx)                   Output Shape              Param #
LeNet                                    [1, 10]                   --
├─Sequential: 1-1                        [1, 10]                   --
│    └─Conv2d: 2-1                       [1, 6, 28, 28]            156
│    └─Tanh: 2-2                         [1, 6, 28, 28]            --
│    └─AvgPool2d: 2-3                    [1, 6, 14, 14]            --
│    └─Conv2d: 2-4                       [1, 16, 10, 10]           2,416
│    └─Tanh: 2-5                         [1, 16, 10, 10]           --
│    └─AvgPool2d: 2-6                    [1, 16, 5, 5]             --
│    └─Flatten: 2-7                      [1, 400]                  --
│    └─Linear: 2-8                       [1, 120]                  48,120
│    └─Tanh: 2-9                         [1, 120]                  --
│    └─Linear: 2-10                      [1, 84]                   10,164
│    └─Tanh: 2-11                        [1, 84]                   --
│  

In [7]:
model = LeNet()
model.to(device)

LeNet(
  (net): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=valid)
    (1): Tanh()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1), padding=valid)
    (4): Tanh()
    (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (6): Flatten(start_dim=1, end_dim=-1)
    (7): Linear(in_features=400, out_features=120, bias=True)
    (8): Tanh()
    (9): Linear(in_features=120, out_features=84, bias=True)
    (10): Tanh()
    (11): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [8]:
lr = 0.001
epochs = 10
optimizer = torch.optim.Adam(model.parameters(),lr=lr)

In [9]:
loss_func = nn.CrossEntropyLoss()

In [10]:
for epoch in range(epochs): 
    loss_vals = []
    for X,y in train_iter:
        X,y = X.to(device), y.to(device)
        
        # forward pass
        y_pred = model.forward(X)
        # calculate loss
        loss = loss_func(y_pred,y)
        loss_vals.append(loss.item())
        
        # clear gradients
        optimizer.zero_grad()

        # backward pass
        loss.backward()
        
        # update params
        optimizer.step()
# print loss in each epoch
    avg_loss = np.mean(loss_vals)
    print(f'Epoch: {epoch + 1}, Loss: {avg_loss:.4f}')

Epoch: 1, Loss: 0.4777
Epoch: 2, Loss: 0.1311
Epoch: 3, Loss: 0.0823
Epoch: 4, Loss: 0.0617
Epoch: 5, Loss: 0.0506
Epoch: 6, Loss: 0.0419
Epoch: 7, Loss: 0.0353
Epoch: 8, Loss: 0.0297
Epoch: 9, Loss: 0.0279
Epoch: 10, Loss: 0.0226
