In [22]:
import numpy as np
import torch
from torch.utils.data import Dataset
from torchvision.transforms import ToTensor

class MNIST_Dataset(Dataset):    
    def __init__(self, data_path, labels_path, transform=None):
        with open(data_path,"rb") as f:
            self.X = np.array(np.frombuffer(f.read(), np.uint8, offset=16)).reshape((-1,28,28,1))
        with open(labels_path,"rb") as f:
            self.y = torch.tensor(np.frombuffer(f.read(), np.uint8, offset=8), dtype = int)
        self.transforms = transform
        
    def __len__(self):
        return len(self.X)
    
    def __getitem__(self, idx):
        return ToTensor()(self.X[idx]), self.y[idx]
                

In [23]:
from torch import nn

class M3_network(nn.Module):
    def __init__(self):
        super(M3_network, self).__init__()
        self.convblock1 = nn.Sequential(
           nn.Conv2d(1, 32, 3),
           nn.BatchNorm2d(32),
           nn.ReLU() 
        )
        self.convblock2 = nn.Sequential(
           nn.Conv2d(32, 48, 3),
           nn.BatchNorm2d(48),
           nn.ReLU() 
        )
        self.convblock3 = nn.Sequential(
           nn.Conv2d(48, 64, 3),
           nn.BatchNorm2d(64),
           nn.ReLU() 
        )
        self.convblock4 = nn.Sequential(
           nn.Conv2d(64, 80, 3),
           nn.BatchNorm2d(80),
           nn.ReLU() 
        )
        self.convblock5 = nn.Sequential(
           nn.Conv2d(80, 96, 3),
           nn.BatchNorm2d(96),
           nn.ReLU() 
        )
        self.convblock6 = nn.Sequential(
           nn.Conv2d(96, 112, 3),
           nn.BatchNorm2d(112),
           nn.ReLU() 
        )
        self.convblock7 = nn.Sequential(
           nn.Conv2d(112, 128, 3),
           nn.BatchNorm2d(128),
           nn.ReLU() 
        )
        self.convblock8 = nn.Sequential(
           nn.Conv2d(128, 144, 3),
           nn.BatchNorm2d(144),
           nn.ReLU() 
        )
        self.convblock9 = nn.Sequential(
           nn.Conv2d(144, 160, 3),
           nn.BatchNorm2d(160),
           nn.ReLU() 
        )
        self.convblock10 = nn.Sequential(
           nn.Conv2d(160, 176, 3),
           nn.BatchNorm2d(176),
           nn.ReLU() 
        )
        self.linearblock1 = nn.Sequential(
            nn.Flatten(),
            nn.Linear(11264, 10),
            nn.BatchNorm1d(10)
        )
    def forward(self, x):
        out = self.convblock1(x)
        out = self.convblock2(out)
        out = self.convblock3(out)
        out = self.convblock4(out)
        out = self.convblock5(out)
        out = self.convblock6(out)
        out = self.convblock7(out)
        out = self.convblock8(out)
        out = self.convblock9(out)
        out = self.convblock10(out) 
        out = self.linearblock1(out)     
        return out

In [24]:
def train_loop(train_dataloader, model, loss_fn, optimizer):
    num_batches = len(train_dataloader)
    for batch_no, (X,y) in enumerate(train_dataloader):
        pred = model(X)
        loss = loss_fn(pred, y)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()
        
        with torch.no_grad():
            correct=0
            correct += ((nn.Softmax(dim=1)(pred)).argmax(1) == y).type(torch.float).sum().item()
            batch_accuracy = correct/len(X)            
        
        print(f"Batch: [{batch_no}/{num_batches}], batch_loss: {round(loss.item(),3)}, batch_accuracy: {round(batch_accuracy,3)}") 

In [25]:
def val_loop(val_dataloader, model, loss_fn):
    with torch.no_grad():
        correct = 0
        total_batch_loss = 0
        for X,y in val_dataloader:
            pred = model(X)
            batch_loss = loss_fn(pred,y)
            
            total_batch_loss+=batch_loss.item()
            correct += ((nn.Softmax(dim=1)(pred)).argmax(1) == y).type(torch.float).sum().item()
        
        num_val_batches = len(val_dataloader)
        num_val_datapoints = len(val_dataloader.dataset)
        
        average_batch_loss= round((total_batch_loss/num_val_batches),3)
        epoch_accuracy = round((correct/num_val_datapoints),3)
        
    print(f"Average_batch_loss: {average_batch_loss}, Accuracy: {epoch_accuracy}")    
            

In [26]:
batch_size = 120
epochs = 2
learning_rate = 0.001

train_dataset = MNIST_Dataset(r"data/train-images.idx3-ubyte", r"data/train-labels.idx1-ubyte")
val_dataset =  MNIST_Dataset(r"data/t10k-images.idx3-ubyte", r"data/t10k-labels.idx1-ubyte")

from torch.utils.data import DataLoader
train_dataloader = DataLoader(train_dataset, batch_size, shuffle=True)
val_dataloader = DataLoader(val_dataset, batch_size, shuffle=True)

In [27]:
model = M3_network()
loss_fn = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), learning_rate)

In [28]:
for epoch in range(epochs):
    print(f"Epoch {epoch}\n__________________")
    print("Training:")
    train_loop(train_dataloader, model, loss_fn, optimizer)
    print("Validating:")
    val_loop(val_dataloader, model, loss_fn)
    print("\n")
    

Epoch 0
__________________
Training:
Batch: [0/500], batch_loss: 2.824, batch_accuracy: 0.05
Batch: [1/500], batch_loss: 1.525, batch_accuracy: 0.508
Batch: [2/500], batch_loss: 1.141, batch_accuracy: 0.708
Batch: [3/500], batch_loss: 1.121, batch_accuracy: 0.7
Batch: [4/500], batch_loss: 0.974, batch_accuracy: 0.817
Batch: [5/500], batch_loss: 0.922, batch_accuracy: 0.8
Batch: [6/500], batch_loss: 0.853, batch_accuracy: 0.842
Batch: [7/500], batch_loss: 0.874, batch_accuracy: 0.842
Batch: [8/500], batch_loss: 0.815, batch_accuracy: 0.875
Batch: [9/500], batch_loss: 0.814, batch_accuracy: 0.842
Batch: [10/500], batch_loss: 0.752, batch_accuracy: 0.875
Batch: [11/500], batch_loss: 0.716, batch_accuracy: 0.883
Batch: [12/500], batch_loss: 0.703, batch_accuracy: 0.892
Batch: [13/500], batch_loss: 0.675, batch_accuracy: 0.925
Batch: [14/500], batch_loss: 0.64, batch_accuracy: 0.908
Batch: [15/500], batch_loss: 0.669, batch_accuracy: 0.933
Batch: [16/500], batch_loss: 0.658, batch_accuracy: