In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import torch.nn.init as init
import torchvision.datasets as dset
import torchvision.transforms as transforms
from torch.utils.data import DataLoader

torch.backends.cudnn.enabled = False
torch.backends.cudnn.benchmark = False

import matplotlib.pyplot as plt

import os
os.environ['CUDA_VISIBLE_DEVICES'] = '0' 

import warnings
warnings.filterwarnings('ignore')
warnings.simplefilter('ignore')

# Modeling

In [2]:
class FC_Block(nn.Module) : 
    def __init__(self, input_dim, output_dim) : 
        super(FC_Block, self).__init__()
        self.input_dim = input_dim
        self.output_dim = output_dim

        self.linear = nn.Linear(input_dim, output_dim)
        self.relu = nn.ReLU(inplace = True)
        self.bn = nn.BatchNorm1d(output_dim)
        self.dr = nn.Dropout()

    def forward(self, x) : 
        out = self.linear(x)
        out = self.bn(out)
        out = self.relu(out)
        out = self.dr(out)
        return out

In [3]:
class MLP_Classifier(nn.Module) : 
    def __init__(self, fc_block, num_classes = 10) : 
        super(MLP_Classifier, self).__init__()
        self.fc1 = fc_block(784, 256)
        self.fc2 = fc_block(256,64)
        self.fc3 = fc_block(64,16)
        self.linear = nn.Linear(16, num_classes)
        self.output = nn.Softmax()
    
    def forward(self, x) : 
        x = self.fc1(x)
        x = self.fc2(x)
        x = self.fc3(x)
        x = self.linear(x)
        x = self.output(x)
        return x


In [4]:
mlp = MLP_Classifier(FC_Block, num_classes=10)

# Load Data

In [5]:
mnist_train = dset.MNIST("./", train=True, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_test = dset.MNIST("./", train=False, transform=transforms.ToTensor(), target_transform=None, download=True)
mnist_train, mnist_val = torch.utils.data.random_split(mnist_train, [48000, 12000])

In [6]:
dataloaders = {}
batch_size = 480
dataloaders['train'] = DataLoader(mnist_train, batch_size=batch_size, shuffle=True)
dataloaders['val'] = DataLoader(mnist_val, batch_size=batch_size, shuffle=False)
dataloaders['test'] = DataLoader(mnist_test, batch_size=batch_size, shuffle=False)

# Training

In [7]:
device = "cuda:0" if torch.cuda.is_available() else "cpu"
mlp = MLP_Classifier(FC_Block, 10).to(device)
optimizer = torch.optim.Adam(mlp.parameters(), lr=0.1, weight_decay=1e-4)
criterion = nn.CrossEntropyLoss().to(device)

In [8]:
import time
import copy

In [9]:
phase = 'train'
train_loss_history = []
valid_loss_history = []
best_val_loss = 10000
since = time.time()
early_stop_epoch = 0

for epoch in range(50) : 
    print('=== Current Epoch : ', epoch, ' ===')

    for phase in ['train','val'] : 
        if phase == 'train' : 
            mlp.train()
        else : 
            mlp.eval()

        epoch_loss = 0
        running_acc = 0
        total = 0

        for batch_idx, (x,y) in enumerate(dataloaders[phase]) : 
            x,y = x.view(-1, 28*28).to(device), y.to(device)
            optimizer.zero_grad()
            out = mlp(x)
            running_acc += sum(out.argmax(1) == y)
            total += x.size(0)
            loss = criterion(out, y)
            epoch_loss += loss / 480

            if phase == 'train' : 
                loss.backward()
                optimizer.step()
            
        
        epoch_acc = running_acc / total
        print(f'{phase} Loss: {epoch_loss} / Acc : {epoch_acc}')

        if phase == 'train' : 
            train_loss_history.append([epoch_loss, epoch_acc])
        elif phase == 'val' : 
            valid_loss_history.append([epoch_loss, epoch_acc])

        if (phase == 'val') and (epoch_loss  < best_val_loss) : 
            best_val_loss = epoch_loss
            best_model_wts = copy.deepcopy(mlp.state_dict())
            early_stop_epoch = 0
        elif phase == 'val' : 
            early_stop_epoch += 1
            if early_stop_epoch > 10 :
                break;

time_elapsed = time.time() - since
print('Training complete in {:.0f}m {:.0f}s'.format(time_elapsed // 60, time_elapsed % 60))
print('Best val Loss: {:4f}'.format(best_val_loss))

# load best model weights
mlp.load_state_dict(best_model_wts)

=== Current Epoch :  0  ===
train Loss: 1.9259496927261353 / Acc : 0.5338333249092102
val Loss: 1.7370600700378418 / Acc : 0.7228333353996277
=== Current Epoch :  1  ===
train Loss: 1.8432836532592773 / Acc : 0.6219375133514404
val Loss: 1.7151570320129395 / Acc : 0.746916651725769
=== Current Epoch :  2  ===
train Loss: 1.8346511125564575 / Acc : 0.6301041841506958
val Loss: 1.7111871242523193 / Acc : 0.7534166574478149
=== Current Epoch :  3  ===
train Loss: 1.8341000080108643 / Acc : 0.629729151725769
val Loss: 1.6606887578964233 / Acc : 0.8048333525657654
=== Current Epoch :  4  ===
train Loss: 1.8236664533615112 / Acc : 0.6413541436195374
val Loss: 1.6614594459533691 / Acc : 0.8015833497047424
=== Current Epoch :  5  ===
train Loss: 1.8299202919006348 / Acc : 0.6355416774749756
val Loss: 1.6827470064163208 / Acc : 0.7800832986831665
=== Current Epoch :  6  ===
train Loss: 1.8324944972991943 / Acc : 0.6333958506584167
val Loss: 1.676156759262085 / Acc : 0.7867500185966492
=== Curre

<All keys matched successfully>

In [10]:
with torch.no_grad():
    mlp.eval()    # set the model to evaluation mode (dropout=False)
    test_acc = 0
    test_size = 0
    for x,y in dataloaders['test'] : 
        x,y = x.view(-1, 28*28).to(device), y.to(device)

        prediction = mlp(x)
        test_acc += sum(prediction.argmax(1) == y)
        test_size += x.size(0)
    print('Test Accuracy:', test_acc.item() / test_size)

Test Accuracy: 0.9541
