In [1]:
import torch 

## MLP with Batchnorm and dropout

In [2]:
import torch.nn as nn
import torch.nn.functional as F
class Neuron(nn.Module):
    def __init__(self, inputs: int, activation=None):
        super(Neuron, self).__init__()
        self.linear = nn.Linear(inputs, 1) 
        self.activation = activation if activation is not None else lambda x: x
    
    
    def forward(self, x):
        return self.activation(self.linear(x))

class Layer(nn.Module):
    def __init__(self, inputs: int, n_neurons: int, activation=None, batchnorm=None, p=None):
        super(Layer, self).__init__()
        self.linear = nn.Linear(inputs, n_neurons)
        self.batchnorm = nn.BatchNorm1d(n_neurons) if batchnorm else None
        self.dropout = nn.Dropout(p) if p else None
        self.activation = activation if activation is not None else lambda x: x
    
    def forward(self, x):
        x = self.linear(x)
        if self.batchnorm is not None:
            x = self.batchnorm(x)
        if self.dropout is not None:
            x = self.dropout(x)
        if self.activation is not None:
            x = self.activation(x)
        return x

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_funcs=None, batchnorm_opts=None, dropout_opts=None):
        super(MLP, self).__init__()
        layers = []
        for i in range(len(layer_sizes) - 1):
            activation = activation_funcs[i] if activation_funcs and i < len(activation_funcs) else None
            batchnorm = batchnorm_opts[i] if batchnorm_opts and i < len(batchnorm_opts) else False
            dropout_prob = dropout_opts[i] if dropout_opts and i < len(dropout_opts) else None
            layers.append(Layer(layer_sizes[i], layer_sizes[i+1], activation, batchnorm=batchnorm, p=dropout_prob))

        self.layers = nn.ModuleList(layers)
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x


In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

## MLP class testing using MNIST dataset

In [4]:
batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100%|██████████| 9912422/9912422 [00:02<00:00, 4480861.23it/s]


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100%|██████████| 28881/28881 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw






Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100%|██████████| 1648877/1648877 [00:00<00:00, 5520052.33it/s]


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100%|██████████| 4542/4542 [00:00<?, ?it/s]

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [5]:

learning_rate = 0.01
num_epochs = 10

layer_sizes = [784, 128, 64, 10]
activation_funcs=[F.relu, F.relu, F.relu]
batchnorm_opts = [True, True, None]
dropout_opts = [0.1, 0.1, None]
model = MLP(layer_sizes, batchnorm_opts=batchnorm_opts, activation_funcs=activation_funcs, dropout_opts=dropout_opts)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [6]:
def train_model():
    model.train()
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.reshape(-1, 28*28)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

def evaluate_model(loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loader:
            images = images.reshape(-1, 28*28)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        return accuracy

In [7]:
train_model()
train_accuracy = evaluate_model(train_loader)
test_accuracy = evaluate_model(test_loader)
print(f'Accuracy of the model on the training set: {train_accuracy}%')
print(f'Accuracy of the model on the test set: {test_accuracy}%')

Epoch [1/10], Step [100/938], Loss: 1.1004
Epoch [1/10], Step [200/938], Loss: 1.1437
Epoch [1/10], Step [300/938], Loss: 0.9237
Epoch [1/10], Step [400/938], Loss: 1.1977
Epoch [1/10], Step [500/938], Loss: 1.0508
Epoch [1/10], Step [600/938], Loss: 0.7245
Epoch [1/10], Step [700/938], Loss: 0.8430
Epoch [1/10], Step [800/938], Loss: 0.9087
Epoch [1/10], Step [900/938], Loss: 0.7385
Epoch [2/10], Step [100/938], Loss: 0.6577
Epoch [2/10], Step [200/938], Loss: 0.8543
Epoch [2/10], Step [300/938], Loss: 0.9456
Epoch [2/10], Step [400/938], Loss: 0.7947
Epoch [2/10], Step [500/938], Loss: 0.7841
Epoch [2/10], Step [600/938], Loss: 1.0193
Epoch [2/10], Step [700/938], Loss: 0.9934
Epoch [2/10], Step [800/938], Loss: 0.6751
Epoch [2/10], Step [900/938], Loss: 0.7027
Epoch [3/10], Step [100/938], Loss: 0.8815
Epoch [3/10], Step [200/938], Loss: 0.8230
Epoch [3/10], Step [300/938], Loss: 0.8763
Epoch [3/10], Step [400/938], Loss: 0.9522
Epoch [3/10], Step [500/938], Loss: 0.7572
Epoch [3/10