In [34]:
import torch 

## MLP with Batchnorm and dropout

In [35]:
import torch.nn as nn
import torch.nn.functional as F
class Neuron(nn.Module):
    def __init__(self, inputs: int, activation=None):
        super(Neuron, self).__init__()
        self.linear = nn.Linear(inputs, 1) 
        self.activation = activation if activation is not None else lambda x: x
    
    
    def forward(self, x):
        return self.activation(self.linear(x))

class Layer(nn.Module):
    def __init__(self, inputs: int, n_neurons: int, activation=None, batchnorm=None, p=None):
        super(Layer, self).__init__()
        self.linear = nn.Linear(inputs, n_neurons)
        self.batchnorm = nn.BatchNorm1d(n_neurons) if batchnorm else None
        self.dropout = nn.Dropout(p) if p else None
        self.activation = activation if activation is not None else lambda x: x
    
    def forward(self, x):
        x = self.linear(x)
        if self.batchnorm is not None:
            x = self.batchnorm(x)
        if self.dropout is not None:
            x = self.dropout(x)
        if self.activation is not None:
            x = self.activation(x)
        return x

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_funcs=None, batchnorm_opts=None, dropout_opts=None):
        super(MLP, self).__init__()
        layers = []
        for i in range(len(layer_sizes) - 1):
            activation = activation_funcs[i] if activation_funcs and i < len(activation_funcs) else None
            batchnorm = batchnorm_opts[i] if batchnorm_opts and i < len(batchnorm_opts) else False
            dropout_prob = dropout_opts[i] if dropout_opts and i < len(dropout_opts) else None
            layers.append(Layer(layer_sizes[i], layer_sizes[i+1], activation, batchnorm=batchnorm, p=dropout_prob))

        self.layers = nn.ModuleList(layers)
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x


In [36]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

## MLP class testing using MNIST dataset

In [37]:
batch_size = 64
transform = transforms.Compose([
    transforms.ToTensor(),
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
test_dataset = datasets.MNIST(root='./data', train=False, download=True, transform=transform)

train_loader = DataLoader(dataset=train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(dataset=test_dataset, batch_size=batch_size, shuffle=False)


In [38]:

learning_rate = 0.01
num_epochs = 10

layer_sizes = [784, 128, 64, 10]
batchnorm_opts = [True, True, None]
dropout_opts = [0.1, 0.1, None]
model = MLP(layer_sizes, batchnorm_opts=batchnorm_opts, dropout_opts=dropout_opts)

criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=learning_rate)

In [39]:
def train_model():
    model.train()
    total_step = len(train_loader)
    for epoch in range(num_epochs):
        for i, (images, labels) in enumerate(train_loader):
            images = images.reshape(-1, 28*28)
            
            outputs = model(images)
            loss = criterion(outputs, labels)
        
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            if (i+1) % 100 == 0:
                print ('Epoch [{}/{}], Step [{}/{}], Loss: {:.4f}'
                       .format(epoch+1, num_epochs, i+1, total_step, loss.item()))

def evaluate_model(loader):
    model.eval()
    with torch.no_grad():
        correct = 0
        total = 0
        for images, labels in loader:
            images = images.reshape(-1, 28*28)
            outputs = model(images)
            _, predicted = torch.max(outputs.data, 1)
            total += labels.size(0)
            correct += (predicted == labels).sum().item()
        accuracy = 100 * correct / total
        return accuracy

In [40]:
train_model()
train_accuracy = evaluate_model(train_loader)
test_accuracy = evaluate_model(test_loader)
print(f'Accuracy of the model on the training set: {train_accuracy}%')
print(f'Accuracy of the model on the test set: {test_accuracy}%')

Epoch [1/10], Step [100/938], Loss: 0.4334
Epoch [1/10], Step [200/938], Loss: 0.1956
Epoch [1/10], Step [300/938], Loss: 0.1317
Epoch [1/10], Step [400/938], Loss: 0.4199
Epoch [1/10], Step [500/938], Loss: 0.2691
Epoch [1/10], Step [600/938], Loss: 0.2612
Epoch [1/10], Step [700/938], Loss: 0.3895
Epoch [1/10], Step [800/938], Loss: 0.2462
Epoch [1/10], Step [900/938], Loss: 0.1971
Epoch [2/10], Step [100/938], Loss: 0.4397
Epoch [2/10], Step [200/938], Loss: 0.2033
Epoch [2/10], Step [300/938], Loss: 0.6246
Epoch [2/10], Step [400/938], Loss: 0.2307
Epoch [2/10], Step [500/938], Loss: 0.2205
Epoch [2/10], Step [600/938], Loss: 0.5395
Epoch [2/10], Step [700/938], Loss: 0.1757
Epoch [2/10], Step [800/938], Loss: 0.4189
Epoch [2/10], Step [900/938], Loss: 0.3296
Epoch [3/10], Step [100/938], Loss: 0.3176
Epoch [3/10], Step [200/938], Loss: 0.3955
Epoch [3/10], Step [300/938], Loss: 0.2506
Epoch [3/10], Step [400/938], Loss: 0.3219
Epoch [3/10], Step [500/938], Loss: 0.2903
Epoch [3/10