In [1]:
import torch 
import matplotlib.pyplot as plt

## MLP class

In [2]:
import torch.nn as nn
import torch.nn.functional as F

class Neuron(nn.Module):
    def __init__(self, inputs: int, activation=None):
        super(Neuron, self).__init__()
        self.linear = nn.Linear(inputs, 1) 
        self.activation = activation if activation is not None else lambda x: x
    
    
    def forward(self, x):
        return self.activation(self.linear(x))

class Layer(nn.Module):
    def __init__(self, inputs: int, n_neurons: int, activation=None):
        super(Layer, self).__init__()
        self.linear = nn.Linear(inputs, n_neurons)
        self.activation = activation if activation is not None else lambda x: x
    
    def forward(self, x):
        x = self.linear(x)
        if self.activation is not None:
            x = self.activation(x)
        return x

class MLP(nn.Module):
    def __init__(self, layer_sizes, activation_funcs=None):
        super(MLP, self).__init__()
        layers = []
        for i in range(len(layer_sizes) - 1):
            activation = activation_funcs[i] if activation_funcs and i < len(activation_funcs) else None
            layers.append(Layer(layer_sizes[i], layer_sizes[i+1], activation))
        self.layers = nn.ModuleList(layers)
    
    def forward(self, x):
        for layer in self.layers:
            x = layer(x)
        return x


## MLP class testing using MNIST dataset

In [3]:
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
import torch.optim as optim

In [4]:
#Data loading and normalizing
transform = transforms.Compose([
    transforms.ToTensor(),  
    transforms.Normalize((0.5,), (0.5,))  
])

train_dataset = datasets.MNIST(root='./data', train=True, download=True, transform=transform)
train_loader = DataLoader(train_dataset, batch_size=64, shuffle=True)


Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-images-idx3-ubyte.gz to ./data\MNIST\raw\train-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/train-labels-idx1-ubyte.gz to ./data\MNIST\raw\train-labels-idx1-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\train-labels-idx1-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-images-idx3-ubyte.gz to ./data\MNIST\raw\t10k-images-idx3-ubyte.gz


100.0%


Extracting ./data\MNIST\raw\t10k-images-idx3-ubyte.gz to ./data\MNIST\raw

Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz
Downloading http://yann.lecun.com/exdb/mnist/t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz


100.0%

Extracting ./data\MNIST\raw\t10k-labels-idx1-ubyte.gz to ./data\MNIST\raw






In [5]:
#Model initializing
model = MLP(layer_sizes=[784, 128, 64, 10], activation_funcs=[F.relu, F.relu])
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.003)

In [6]:
#Training loop
epochs = 5
for epoch in range(epochs):
    running_loss = 0
    for images, labels in train_loader:
        # Flatten MNIST images into a 784 vector
        images = images.view(images.shape[0], -1)
        
        optimizer.zero_grad() 
        output = model(images)
        loss = criterion(output, labels)
        loss.backward()
        optimizer.step()
        
        running_loss += loss.item()
    print(f"Epoch {epoch+1} - Training loss: {running_loss/len(train_loader)}")

KeyboardInterrupt: 