MNIST dataset preparation and analysis

In [1]:
# import the necessary packages
import torch
import numpy as np
import matplotlib.pyplot as plt
import matplotlib
from torchvision import datasets
import torchvision.transforms as transforms
from torch.utils.data import DataLoader
from torch import nn
from torchinfo import summary
from torchmetrics import Accuracy
from torch.utils.tensorboard import SummaryWriter
from datetime import datetime
import os

In [2]:
# Download the MNIST dataset
train_val_dataset = datasets.MNIST(root="./datasets/", train=True, download=True)
test_dataset = datasets.MNIST(root="./datasets/", train=False, download=True)

In [3]:
# Dataset summary
print("Train/Validation dataset size:", len(train_val_dataset))
print("Test dataset size:", len(test_dataset))

class_names = train_val_dataset.classes
print("Class names:", class_names)

Train/Validation dataset size: 60000
Test dataset size: 10000
Class names: ['0 - zero', '1 - one', '2 - two', '3 - three', '4 - four', '5 - five', '6 - six', '7 - seven', '8 - eight', '9 - nine']


In [4]:
# plot image with pixel values
def img_pixel_superimpose(img):
    img = img.numpy()
    fig = plt.figure(figsize=(12, 12))
    ax = fig.add_subplot(111)
    w, h = img.shape
    color_map = matplotlib.colormaps['gray_r']  # gray_reversed
    ax.imshow(img, cmap="gray")
    for x in range(w):
        for y in range(h):
            color = color_map(img[x][y])
            ax.annotate(str(img[x][y]), xy=(y,x), horizontalalignment='center', verticalalignment='center',
                        color=color)
            plt.axis(False)
    plt.savefig("pixel_img.png")
    plt.show()

def terminal_print(img):
    img = img.numpy()
    w, h = img.shape
    for x in range(w):
        for y in range(h):
            print(img[x][y], end=" ")
        print()
    print()

def csv_print(img):
    img = img.numpy()
    w, h = img.shape
    with open("img.csv", "w") as f:
        for x in range(w):
            for y in range(h):
                f.write(str(img[x][y]) + ",")
            f.write("\n")
        f.write("\n")
        
img0 = train_val_dataset.data[0]
# img_pixel_superimpose(img0)
terminal_print(img0)
# csv_print(img0)

0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 3 18 18 18 126 136 175 26 166 255 247 127 0 0 0 0 
0 0 0 0 0 0 0 0 30 36 94 154 170 253 253 253 253 253 225 172 253 242 195 64 0 0 0 0 
0 0 0 0 0 0 0 49 238 253 253 253 253 253 253 253 253 251 93 82 82 56 39 0 0 0 0 0 
0 0 0 0 0 0 0 18 219 253 253 253 253 253 198 182 247 241 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 80 156 107 253 253 205 11 0 43 154 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 14 1 154 253 90 0 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 139 253 190 2 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 11 190 253 70 0 0 0 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 35 241 225 160 108 1 0 0 0 0 0 0 0 0 0 0 
0 0 0 0 0 0 0 0 0 0 0 0 0 81 240 253 253 119 25 0 0 0 0 0 0 0 0 0 


In [5]:
# convert the datasets into tensors
transform = transforms.ToTensor()
train_val_dataset = datasets.MNIST(root="./datasets/", train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root="./datasets/", train=False, download=True, transform=transform)

In [6]:
# Data Normalization
# Calculate mean and std
#imgs = torch.stack([img for img, _ in train_val_dataset], dim=0)
#mean = imgs.view(1, -1).mean(dim=1)    # or imgs.mean()
mean = 0.1307
#std = imgs.view(1, -1).std(dim=1)     # or imgs.std()
std = 0.3081
print("Mean:", mean)
print("Std:", std)
# Composition of transforms
mnist_transforms = transforms.Compose([transforms.ToTensor(),
                                       transforms.Normalize(mean=mean, std=std)])

Mean: 0.1307
Std: 0.3081


In [7]:
# Apply transforms to datasets
train_val_dataset = datasets.MNIST(root="./datasets/", train=True, download=True, transform=mnist_transforms)
test_dataset = datasets.MNIST(root="./datasets/", train=False, download=True, transform=mnist_transforms)

In [8]:
# Split train and validation datasets
train_size = int(0.9 * len(train_val_dataset))
val_size = len(train_val_dataset) - train_size
train_dataset, val_dataset = torch.utils.data.random_split(dataset=train_val_dataset, lengths=[train_size, val_size])

In [9]:
# Dataset summary
print("Train dataset size:", len(train_dataset))
print("Validation dataset size:", len(val_dataset))
print("Test dataset size:", len(test_dataset))

Train dataset size: 54000
Validation dataset size: 6000
Test dataset size: 10000


In [10]:
# Create dataloaders
# check memory available
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Device:", device)

# set batch size based on available memory
BATCH_SIZE = 128 if device == "cuda" else 64

train_dataloader = DataLoader(dataset=train_dataset, batch_size=BATCH_SIZE, shuffle=True)
val_dataloader = DataLoader(dataset=val_dataset, batch_size=BATCH_SIZE, shuffle=True)
test_dataloader = DataLoader(dataset=test_dataset, batch_size=BATCH_SIZE, shuffle=True)

# Summary of dataloaders
print("Train dataloader size:", len(train_dataloader))
print("Validation dataloader size:", len(val_dataloader))
print("Test dataloader size:", len(test_dataloader))

Device: cpu
Train dataloader size: 844
Validation dataloader size: 94
Test dataloader size: 157


LeNet-5 architecture implementation

In [11]:
# LeNet-5 architecture implementation
class LeNet5_V1(nn.Module):
    def __init__(self):
        super().__init__()
        # Feature extractor
        self.feature = nn.Sequential(
            # Convolutional layers
            # First conv layer
            # input: 1 x 28 x 28 --> padding = 2 --> 1 x 32 x 32 --> 6 x 28 x 28
            nn.Conv2d(in_channels=1, out_channels=6, kernel_size=5, stride=1, padding=2),
            # activation function
            nn.Sigmoid(),
            # pooling layer 14 x 14
            nn.AvgPool2d(kernel_size=2, stride=2),

            # Second conv layer
            # input: 6 x 14 x 14 --> 16 x 10 x 10
            nn.Conv2d(in_channels=6, out_channels=16, kernel_size=5, stride=1),
            # activation function
            nn.Sigmoid(),
            # pooling layer 5 x 5
            nn.AvgPool2d(kernel_size=2, stride=2),
        )

        # Classifier
        self.classifier = nn.Sequential(
            # Fully connected layers
            # First fc layer
            # input: 16 x 5 x 5 = 400 --> 120
            # flatten
            nn.Flatten(),
            # fc layer
            nn.Linear(in_features=16 * 5 * 5, out_features=120),
            # activation function
            nn.Sigmoid(), # sigmoid

            # Second fc layer
            nn.Linear(in_features=120, out_features=84),

            # activation function
            nn.Sigmoid(), # sigmoid

            # Third fc layer
            nn.Linear(in_features=84, out_features=10)
        )

    def forward(self, x):
        return self.classifier(self.feature(x))

In [12]:
# Model creation
model_lenet5 = LeNet5_V1()
print(model_lenet5)

# move the model to the device
model_lenet5.to(device)

# Model summary
summary(model=model_lenet5, input_size=(1, 1, 28, 28), device=device)

LeNet5_V1(
  (feature): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Sigmoid()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=400, out_features=120, bias=True)
    (2): Sigmoid()
    (3): Linear(in_features=120, out_features=84, bias=True)
    (4): Sigmoid()
    (5): Linear(in_features=84, out_features=10, bias=True)
  )
)


Layer (type:depth-idx)                   Output Shape              Param #
LeNet5_V1                                [1, 10]                   --
├─Sequential: 1-1                        [1, 16, 5, 5]             --
│    └─Conv2d: 2-1                       [1, 6, 28, 28]            156
│    └─Sigmoid: 2-2                      [1, 6, 28, 28]            --
│    └─AvgPool2d: 2-3                    [1, 6, 14, 14]            --
│    └─Conv2d: 2-4                       [1, 16, 10, 10]           2,416
│    └─Sigmoid: 2-5                      [1, 16, 10, 10]           --
│    └─AvgPool2d: 2-6                    [1, 16, 5, 5]             --
├─Sequential: 1-2                        [1, 10]                   --
│    └─Flatten: 2-7                      [1, 400]                  --
│    └─Linear: 2-8                       [1, 120]                  48,120
│    └─Sigmoid: 2-9                      [1, 120]                  --
│    └─Linear: 2-10                      [1, 84]                   10,164
│  

In [13]:
# Loss function
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(params=model_lenet5.parameters(), lr=1e-3)
accuracy = Accuracy(task="multiclass", num_classes=10).to(device)

Training

In [14]:
# Training
# Log Tracking use tensorboard to generate log dirs
log_dir = os.path.join("logs", datetime.now().strftime("%Y%m%d-%H%M%S"))
os.makedirs(log_dir, exist_ok=True)
writer = SummaryWriter(log_dir=log_dir)

In [15]:
# training loop
EPOCHS = 12
for epoch in range(EPOCHS):
    # training phase
    model_lenet5.train()
    for batch_idx, (data, targets) in enumerate(train_dataloader):
        # move data to device
        data = data.to(device)
        targets = targets.to(device)

        # forward
        scores = model_lenet5(data)
        loss = loss_fn(scores, targets)

        # backward
        optimizer.zero_grad()
        loss.backward()

        # gradient descent or adam step
        optimizer.step()

        # calculate accuracy
        acc = accuracy(scores, targets)

        # print
        if batch_idx % 100 == 0:
            print(f"Epoch [{epoch}/{EPOCHS}] Batch {batch_idx}/{len(train_dataloader)} Loss {loss:.4f} Accuracy {acc:.4f}")

            # write to tensorboard
            step = epoch * len(train_dataloader) + batch_idx
            writer.add_scalar("Training loss", loss, global_step=step)
            writer.add_scalar("Training accuracy", acc, global_step=step)

    # validation phase
    model_lenet5.eval()
    with torch.no_grad():
        for batch_idx, (data, targets) in enumerate(val_dataloader):
            # move data to device
            data = data.to(device)
            targets = targets.to(device)

            # forward
            scores = model_lenet5(data)
            loss = loss_fn(scores, targets)

            # calculate accuracy
            acc = accuracy(scores, targets)

            # print
            if batch_idx % 100 == 0:
                print(f"Epoch [{epoch}/{EPOCHS}] Batch {batch_idx}/{len(val_dataloader)} Loss {loss:.4f} Accuracy {acc:.4f}")

                # write to tensorboard
                step = epoch * len(val_dataloader) + batch_idx
                writer.add_scalar("Validation loss", loss, global_step=step)
                writer.add_scalar("Validation accuracy", acc, global_step=step)
              
writer.flush()  
writer.close()

Epoch [0/12] Batch 0/844 Loss 2.3796 Accuracy 0.0625
Epoch [0/12] Batch 100/844 Loss 2.2677 Accuracy 0.1719
Epoch [0/12] Batch 200/844 Loss 1.5034 Accuracy 0.5625
Epoch [0/12] Batch 300/844 Loss 0.9353 Accuracy 0.8125
Epoch [0/12] Batch 400/844 Loss 0.5573 Accuracy 0.8750
Epoch [0/12] Batch 500/844 Loss 0.4329 Accuracy 0.8906
Epoch [0/12] Batch 600/844 Loss 0.3068 Accuracy 0.9375
Epoch [0/12] Batch 700/844 Loss 0.2818 Accuracy 0.9219
Epoch [0/12] Batch 800/844 Loss 0.4311 Accuracy 0.8750
Epoch [0/12] Batch 0/94 Loss 0.3167 Accuracy 0.9219
Epoch [1/12] Batch 0/844 Loss 0.3086 Accuracy 0.8906
Epoch [1/12] Batch 100/844 Loss 0.3100 Accuracy 0.9219
Epoch [1/12] Batch 200/844 Loss 0.2918 Accuracy 0.9219
Epoch [1/12] Batch 300/844 Loss 0.3059 Accuracy 0.9062
Epoch [1/12] Batch 400/844 Loss 0.1597 Accuracy 0.9688
Epoch [1/12] Batch 500/844 Loss 0.2353 Accuracy 0.9219
Epoch [1/12] Batch 600/844 Loss 0.1025 Accuracy 0.9844
Epoch [1/12] Batch 700/844 Loss 0.1863 Accuracy 0.9219
Epoch [1/12] Batc

In [16]:
# Save the model
if not os.path.exists("models"):
    os.makedirs("models")
num = 2
model_filename = f"LeNet-5_original_v{num}.pth"
torch.save(model_lenet5.state_dict(), os.path.join("models", model_filename))

Evaluation

In [17]:
# load model
model_lenet5_loaded = LeNet5_V1()
model_lenet5_loaded.load_state_dict(torch.load(os.path.join("models", model_filename)))
model_lenet5_loaded.to(device)

LeNet5_V1(
  (feature): Sequential(
    (0): Conv2d(1, 6, kernel_size=(5, 5), stride=(1, 1), padding=(2, 2))
    (1): Sigmoid()
    (2): AvgPool2d(kernel_size=2, stride=2, padding=0)
    (3): Conv2d(6, 16, kernel_size=(5, 5), stride=(1, 1))
    (4): Sigmoid()
    (5): AvgPool2d(kernel_size=2, stride=2, padding=0)
  )
  (classifier): Sequential(
    (0): Flatten(start_dim=1, end_dim=-1)
    (1): Linear(in_features=400, out_features=120, bias=True)
    (2): Sigmoid()
    (3): Linear(in_features=120, out_features=84, bias=True)
    (4): Sigmoid()
    (5): Linear(in_features=84, out_features=10, bias=True)
  )
)

In [18]:
model_lenet5_loaded.eval()

# test phase
test_loss = 0
test_acc = 0
with torch.no_grad():
    for batch_idx, (data, targets) in enumerate(test_dataloader):
        # move data to device
        data = data.to(device)
        targets = targets.to(device)

        # forward
        scores = model_lenet5_loaded(data)
        loss = loss_fn(scores, targets)

        # calculate accuracy
        acc = accuracy(scores, targets)

        # print
        if batch_idx % 100 == 0:
            print(f"Batch {batch_idx}/{len(test_dataloader)} Loss {loss:.4f} Accuracy {acc:.4f}")

        test_loss += loss
        test_acc += acc

Batch 0/157 Loss 0.0124 Accuracy 1.0000
Batch 100/157 Loss 0.0173 Accuracy 1.0000
