In [1]:
import torch
import torchvision
from torch import nn
import torchvision.transforms as transforms
from torch.utils.data.dataset import random_split
from torch.utils.data import Subset, DataLoader, random_split
import torch.optim as optim
import matplotlib.pyplot as plt
import numpy as np



torch.manual_seed(42)
torch.set_default_dtype(torch.double)

In [2]:
transform = transforms.Compose([
    transforms.ToTensor(),                          #converts to tensor
    transforms.Lambda(lambda x: torch.flatten(x))   #Flattens the tensor
])

In [3]:
trainset = torchvision.datasets.CIFAR10(root='./data', train=True,
                                         download=True, transform=transform)

testset = torchvision.datasets.CIFAR10(root='./data', train=False,
                                        download=True, transform=transform)
                                        

Files already downloaded and verified
Files already downloaded and verified


In [4]:
classes_to_keep = ['bird', 'airplane']
class_indices = [trainset.class_to_idx[c] for c in classes_to_keep]

In [5]:
subset_trainset = Subset(trainset, [i for i in range(len(trainset)) if trainset.targets[i] in class_indices])
subset_testset = Subset(testset, [i for i in range(len(testset)) if testset.targets[i] in class_indices])

In [6]:
train_size = int(0.8 * len(subset_trainset))
val_size = len(subset_trainset) - train_size
trainset, valset = random_split(subset_trainset, [train_size, val_size])

In [7]:
train_loader = DataLoader(trainset, batch_size=32)
valloader = DataLoader(valset, batch_size=32)
testloader = DataLoader(subset_testset, batch_size=32)

In [8]:
class MyMLP(nn.Module):
    def __init__(self):
        super(MyMLP, self).__init__()
        self.fc1 = nn.Linear(3072, 512)  # Fully connected layer 1
        self.fc2 = nn.Linear(512, 128)   # Fully connected layer 2
        self.fc3 = nn.Linear(128, 32)    # Fully connected layer 3
        self.fc4 = nn.Linear(32, 2)      # Fully connected  output layer

    def forward(self, x):
        # Forward propagation via ReLU
        x = torch.relu(self.fc1(x))
        x = torch.relu(self.fc2(x))
        x = torch.relu(self.fc3(x))
        x = self.fc4(x)  # No activation function at the output
        return x

In [9]:
def train(n_epochs, optimizer, model, loss_fn, train_loader):
    for epoch in range(1, n_epochs+1):
        running_loss = 0.0
        for inputs, labels in train_loader:
            optimizer.zero_grad()  # Reset gradients in each iteration

            outputs = model(inputs)  # Forward propagation
            labels = torch.Tensor([[1.0, 0.0] if i.item() == 0 else [0.0, 1.0] for i in labels])
            loss = loss_fn(outputs, labels)  # Loss calculation
            
            loss.backward()  # Back propagation
            optimizer.step()  # Update parameters

            running_loss += loss.item() * inputs.size(0)

        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch}/{n_epochs}], Loss: {epoch_loss:.4f}')


In [10]:
def train_manual_update(n_epochs, model, loss_fn, train_loader, lr, weight_decay=0.0, momentum=0.0):
    velocities ={i: 0 for i, p in enumerate(model.parameters())}
    for epoch in range(1, n_epochs+1):
        running_loss = 0.0
        for inputs, labels in train_loader:
            model.train()  # Make sure the model is in train 
            outputs = model(inputs)  # Forward Propagation

            labels = torch.Tensor([[1.0, 0.0] if i.item() == 0 else [0.0, 1.0] for i in labels])
            loss = loss_fn(outputs, labels)  # Loss Calculation

            # Calculate gradients manualy
            loss.backward()

            # Update parameters manually using learning rate
            with torch.no_grad():
                for i, param in enumerate(model.parameters()):
                    gradient = param.grad

                    if weight_decay != 0:
                        gradient = gradient.add_(param.data, alpha=weight_decay)

                    if momentum != 0:
                        velocities[i] = velocities[i] * momentum + gradient
                        gradient = velocities[i]
                    
                    new_param = param.data.add_(gradient, alpha=-lr)
                    param.copy_(new_param)
                    param.grad.zero_()

            # Reset gradients
            model.zero_grad()

            running_loss += loss.item() * inputs.size(0)

        # Average loss per epoch
        epoch_loss = running_loss / len(train_loader.dataset)
        print(f'Epoch [{epoch}/{n_epochs}], Loss: {epoch_loss:.4f}')

    # We return the final loss value in able to afterwards know the best aproach
    return epoch_loss


In [11]:
n_epochs = 10
lr = 0.001

model = MyMLP()
model_manual = MyMLP()

criterion = nn.CrossEntropyLoss()
optimizer = optim.SGD(model.parameters(), lr=lr)

train(n_epochs, optimizer, model, criterion, train_loader)
train_manual_update(n_epochs, model_manual, criterion, train_loader, lr, weight_decay=0.1, momentum=0.9)

Epoch [1/10], Loss: 0.6920
Epoch [2/10], Loss: 0.6877
Epoch [3/10], Loss: 0.6848
Epoch [4/10], Loss: 0.6819
Epoch [5/10], Loss: 0.6789
Epoch [6/10], Loss: 0.6755
Epoch [7/10], Loss: 0.6717
Epoch [8/10], Loss: 0.6673
Epoch [9/10], Loss: 0.6623
Epoch [10/10], Loss: 0.6563
Epoch [1/10], Loss: 0.6793
Epoch [2/10], Loss: 0.6587
Epoch [3/10], Loss: 0.6276
Epoch [4/10], Loss: 0.5856
Epoch [5/10], Loss: 0.5553
Epoch [6/10], Loss: 0.5409
Epoch [7/10], Loss: 0.5340
Epoch [8/10], Loss: 0.5302
Epoch [9/10], Loss: 0.5280
Epoch [10/10], Loss: 0.5265


0.5264940635390785

In [12]:
model.eval()
model_manual.eval()

epoch_loss_train = 0.0
epoch_loss_manual = 0.0

with torch.no_grad():
    for inputs, labels in train_loader:
        outputs = model(inputs)
        
        labels = torch.Tensor([[1.0, 0.0] if i.item() == 0 else [0.0, 1.0] for i in labels])
        loss = criterion(outputs, labels)
        epoch_loss_train += loss.item() * inputs.size(0)

        outputs_manual = model_manual(inputs)
        loss_manual = criterion(outputs_manual, labels)
        epoch_loss_manual += loss_manual.item() * inputs.size(0)

epoch_loss_train /= len(train_loader.dataset)
epoch_loss_manual /= len(train_loader.dataset)

# Print loss for each model
print("Loss (train):", epoch_loss_train)
print("Loss (train_manual_update):", epoch_loss_manual)

Loss (train): 0.6526539619248303
Loss (train_manual_update): 0.5233056174851183


In [18]:
# Train 4 models with different learning rates, momentum and weight decay
models = []
for _ in range(4):
    model = MyMLP()
    models.append(model)

n_epochs = 10
criterion = nn.CrossEntropyLoss()
performance = []
performance.append(train_manual_update(n_epochs, models[0], criterion, train_loader, lr=0.001, weight_decay=0.0, momentum=0.0))
performance.append(train_manual_update(n_epochs, models[1], criterion, train_loader, lr=0.01, weight_decay=0.0, momentum=0.0))
performance.append(train_manual_update(n_epochs, models[2], criterion, train_loader, lr=0.001, weight_decay=0.1, momentum=0.9))
performance.append(train_manual_update(n_epochs, models[3], criterion, train_loader, lr=0.01, weight_decay=0.0, momentum=0.9))

Epoch [1/10], Loss: 0.6909
Epoch [2/10], Loss: 0.6864
Epoch [3/10], Loss: 0.6825
Epoch [4/10], Loss: 0.6782
Epoch [5/10], Loss: 0.6731
Epoch [6/10], Loss: 0.6668
Epoch [7/10], Loss: 0.6579
Epoch [8/10], Loss: 0.6485
Epoch [9/10], Loss: 0.6379
Epoch [10/10], Loss: 0.6256
Epoch [1/10], Loss: 0.6602
Epoch [2/10], Loss: 0.5659
Epoch [3/10], Loss: 0.5195
Epoch [4/10], Loss: 0.5060
Epoch [5/10], Loss: 0.4971
Epoch [6/10], Loss: 0.4897
Epoch [7/10], Loss: 0.4824
Epoch [8/10], Loss: 0.4748
Epoch [9/10], Loss: 0.4665
Epoch [10/10], Loss: 0.4577
Epoch [1/10], Loss: 0.6864
Epoch [2/10], Loss: 0.6786
Epoch [3/10], Loss: 0.6707
Epoch [4/10], Loss: 0.6577
Epoch [5/10], Loss: 0.6332
Epoch [6/10], Loss: 0.5996
Epoch [7/10], Loss: 0.5743
Epoch [8/10], Loss: 0.5602
Epoch [9/10], Loss: 0.5524
Epoch [10/10], Loss: 0.5479
Epoch [1/10], Loss: 0.5604
Epoch [2/10], Loss: 0.5134
Epoch [3/10], Loss: 0.4920
Epoch [4/10], Loss: 0.4744
Epoch [5/10], Loss: 0.4626
Epoch [6/10], Loss: 0.4510
Epoch [7/10], Loss: 0.443

In [20]:
# Get the best perfomance
bestModelIndex = performance.index(min(performance))
print("The model", bestModelIndex+1, "had the best approach")
bestModel = models[bestModelIndex]

# Model 4 has the best performance
# Model 4 on unseen data

bestModel.eval()

epoch_loss_test = 0.0

with torch.no_grad():
    for inputs, labels in testloader:
        outputs = bestModel(inputs)
        
        labels = torch.Tensor([[1.0, 0.0] if i.item() == 0 else [0.0, 1.0] for i in labels])
        loss = criterion(outputs, labels)
        epoch_loss_test += loss.item() * inputs.size(0)

epoch_loss_test /= len(testloader.dataset)

print("Loss validation (test):", epoch_loss_test)



The model 4 had the best approach
Loss validation (test): 0.4307105165541154
