In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
import mlflow
import mlflow.pytorch
import itertools
import torch.nn.functional as F
import os

In [2]:
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print(f"Using device: {device}")

Using device: cuda


In [3]:
transform = transforms.Compose(
    [transforms.ToTensor(), transforms.Normalize((0.5,), (0.5,))]
)

train_ds = datasets.MNIST(
    root="../data", train=True, download=True, transform=transform
)
test_ds = datasets.MNIST(
    root="../data", train=False, download=True, transform=transform
)

train_loader = DataLoader(train_ds, batch_size=64, shuffle=True)
test_loader = DataLoader(test_ds, batch_size=64)

In [4]:
class ModelA(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(nn.Conv2d(1, 16, 3, 1), nn.ReLU(), nn.MaxPool2d(2))
        self.fc = nn.Linear(16 * 13 * 13, 10)

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [5]:
class ModelB(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv = nn.Sequential(
            nn.Conv2d(1, 32, 3, 1),
            nn.ReLU(),
            nn.Conv2d(32, 64, 3, 1),
            nn.ReLU(),
            nn.MaxPool2d(2),
        )
        self.fc = nn.Linear(64 * 12 * 12, 10)

    def forward(self, x):
        x = self.conv(x)
        x = x.view(x.size(0), -1)
        return self.fc(x)

In [6]:
class ModelC(nn.Module):
    def __init__(self):
        super().__init__()
        self.conv1 = nn.Conv2d(1, 32, 3, 1)
        self.conv2 = nn.Conv2d(32, 64, 3, 1)
        self.pool = nn.MaxPool2d(2, 2)
        self.dropout1 = nn.Dropout(0.25)
        self.fc1 = nn.Linear(64 * 5 * 5, 128)
        self.fc2 = nn.Linear(128, 10)

    def forward(self, x):
        x = self.pool(F.relu(self.conv1(x)))
        x = self.pool(F.relu(self.conv2(x)))
        x = torch.flatten(x, 1)
        x = self.dropout1(x)
        x = F.relu(self.fc1(x))
        x = self.fc2(x)
        return x

In [7]:
def train_model(model, train_loader, epochs, lr):
    model = model.to(device)
    criterion = nn.CrossEntropyLoss()
    optimizer = optim.Adam(model.parameters(), lr=lr)

    for epoch in range(epochs):
        model.train()
        total_loss = 0
        correct = 0
        total = 0

        for data, target in train_loader:
            data, target = data.to(device), target.to(device)
            optimizer.zero_grad()
            outputs = model(data)
            loss = criterion(outputs, target)
            loss.backward()
            optimizer.step()

            total_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

        avg_loss = total_loss / len(train_loader)
        acc = correct / total
        print(f"Epoch {epoch+1}: Loss = {avg_loss:.4f}, Accuracy = {acc:.4f}")

    return model

In [8]:
def evaluate_model(model, test_loader, criterion):
    model.eval()
    test_loss = 0
    correct = 0
    total = 0

    with torch.no_grad():
        for data, target in test_loader:
            data, target = data.to(device), target.to(device)
            outputs = model(data)
            loss = criterion(outputs, target)
            test_loss += loss.item()
            _, predicted = torch.max(outputs, 1)
            correct += (predicted == target).sum().item()
            total += target.size(0)

    avg_loss = test_loss / len(test_loader)
    acc = correct / total
    print(f"Test Loss: {avg_loss:.4f}, Test Accuracy: {acc:.4f}")
    return avg_loss, acc

In [9]:
learning_rates = [0.01, 0.001]
epochs_list = [5, 10]
model_classes = [ModelA, ModelB, ModelC]

param_combinations = list(itertools.product(model_classes, learning_rates, epochs_list))
criterion = nn.CrossEntropyLoss()

for model_class, lr, epochs in param_combinations:
    print(f"\n--- Training {model_class.__name__} | lr: {lr}, epochs: {epochs} ---")
    model = model_class()

    with mlflow.start_run(run_name=f"{model_class.__name__}_run"):
        mlflow.log_param("model_name", model_class.__name__)
        mlflow.log_param("learning_rate", lr)
        mlflow.log_param("epochs", epochs)

        model = train_model(model, train_loader, epochs, lr)
        test_loss, test_acc = evaluate_model(model, test_loader, criterion)

        mlflow.log_metric("test_loss", test_loss)
        mlflow.log_metric("test_accuracy", test_acc)

        # Save local checkpoint
        model_path = f"{model_class.__name__}.pth"
        torch.save(model.state_dict(), model_path)
        print(f"Model saved to {model_path}")

        # Log model to MLflow
        mlflow.pytorch.log_model(model, artifact_path="model")


--- Training ModelA | lr: 0.01, epochs: 5 ---
Epoch 1: Loss = 0.1561, Accuracy = 0.9548
Epoch 2: Loss = 0.0719, Accuracy = 0.9778
Epoch 3: Loss = 0.0557, Accuracy = 0.9829
Epoch 4: Loss = 0.0479, Accuracy = 0.9849
Epoch 5: Loss = 0.0409, Accuracy = 0.9873




Test Loss: 0.0720, Test Accuracy: 0.9788
Model saved to ModelA.pth





--- Training ModelA | lr: 0.01, epochs: 10 ---
Epoch 1: Loss = 0.1664, Accuracy = 0.9511
Epoch 2: Loss = 0.0695, Accuracy = 0.9791
Epoch 3: Loss = 0.0557, Accuracy = 0.9832
Epoch 4: Loss = 0.0466, Accuracy = 0.9855
Epoch 5: Loss = 0.0410, Accuracy = 0.9865
Epoch 6: Loss = 0.0367, Accuracy = 0.9883
Epoch 7: Loss = 0.0318, Accuracy = 0.9898
Epoch 8: Loss = 0.0297, Accuracy = 0.9908
Epoch 9: Loss = 0.0285, Accuracy = 0.9910
Epoch 10: Loss = 0.0247, Accuracy = 0.9924




Test Loss: 0.1387, Test Accuracy: 0.9763
Model saved to ModelA.pth





--- Training ModelA | lr: 0.001, epochs: 5 ---
Epoch 1: Loss = 0.2347, Accuracy = 0.9356
Epoch 2: Loss = 0.0834, Accuracy = 0.9756
Epoch 3: Loss = 0.0628, Accuracy = 0.9813
Epoch 4: Loss = 0.0529, Accuracy = 0.9844
Epoch 5: Loss = 0.0459, Accuracy = 0.9862




Test Loss: 0.0565, Test Accuracy: 0.9827
Model saved to ModelA.pth





--- Training ModelA | lr: 0.001, epochs: 10 ---
Epoch 1: Loss = 0.2510, Accuracy = 0.9304
Epoch 2: Loss = 0.0911, Accuracy = 0.9739
Epoch 3: Loss = 0.0666, Accuracy = 0.9806
Epoch 4: Loss = 0.0565, Accuracy = 0.9834
Epoch 5: Loss = 0.0484, Accuracy = 0.9855
Epoch 6: Loss = 0.0437, Accuracy = 0.9866
Epoch 7: Loss = 0.0388, Accuracy = 0.9877
Epoch 8: Loss = 0.0366, Accuracy = 0.9884
Epoch 9: Loss = 0.0312, Accuracy = 0.9903
Epoch 10: Loss = 0.0291, Accuracy = 0.9905




Test Loss: 0.0597, Test Accuracy: 0.9823
Model saved to ModelA.pth





--- Training ModelB | lr: 0.01, epochs: 5 ---
Epoch 1: Loss = 0.1570, Accuracy = 0.9565
Epoch 2: Loss = 0.0758, Accuracy = 0.9769
Epoch 3: Loss = 0.0671, Accuracy = 0.9799
Epoch 4: Loss = 0.0578, Accuracy = 0.9825
Epoch 5: Loss = 0.0563, Accuracy = 0.9823




Test Loss: 0.0761, Test Accuracy: 0.9768
Model saved to ModelB.pth





--- Training ModelB | lr: 0.01, epochs: 10 ---
Epoch 1: Loss = 0.1571, Accuracy = 0.9608
Epoch 2: Loss = 0.0702, Accuracy = 0.9792
Epoch 3: Loss = 0.0609, Accuracy = 0.9815
Epoch 4: Loss = 0.0512, Accuracy = 0.9843
Epoch 5: Loss = 0.0506, Accuracy = 0.9844
Epoch 6: Loss = 0.0476, Accuracy = 0.9852
Epoch 7: Loss = 0.0619, Accuracy = 0.9838
Epoch 8: Loss = 0.0443, Accuracy = 0.9867
Epoch 9: Loss = 0.0428, Accuracy = 0.9873
Epoch 10: Loss = 0.0511, Accuracy = 0.9852




Test Loss: 0.0768, Test Accuracy: 0.9790
Model saved to ModelB.pth





--- Training ModelB | lr: 0.001, epochs: 5 ---
Epoch 1: Loss = 0.1325, Accuracy = 0.9609
Epoch 2: Loss = 0.0451, Accuracy = 0.9859
Epoch 3: Loss = 0.0303, Accuracy = 0.9903
Epoch 4: Loss = 0.0222, Accuracy = 0.9929
Epoch 5: Loss = 0.0162, Accuracy = 0.9947




Test Loss: 0.0396, Test Accuracy: 0.9886
Model saved to ModelB.pth





--- Training ModelB | lr: 0.001, epochs: 10 ---
Epoch 1: Loss = 0.1317, Accuracy = 0.9601
Epoch 2: Loss = 0.0436, Accuracy = 0.9865
Epoch 3: Loss = 0.0299, Accuracy = 0.9907
Epoch 4: Loss = 0.0206, Accuracy = 0.9932
Epoch 5: Loss = 0.0150, Accuracy = 0.9952
Epoch 6: Loss = 0.0124, Accuracy = 0.9958
Epoch 7: Loss = 0.0094, Accuracy = 0.9967
Epoch 8: Loss = 0.0068, Accuracy = 0.9978
Epoch 9: Loss = 0.0074, Accuracy = 0.9975
Epoch 10: Loss = 0.0048, Accuracy = 0.9986




Test Loss: 0.0517, Test Accuracy: 0.9875
Model saved to ModelB.pth





--- Training ModelC | lr: 0.01, epochs: 5 ---
Epoch 1: Loss = 0.2874, Accuracy = 0.9117
Epoch 2: Loss = 0.1535, Accuracy = 0.9529
Epoch 3: Loss = 0.1384, Accuracy = 0.9577
Epoch 4: Loss = 0.1295, Accuracy = 0.9615
Epoch 5: Loss = 0.1257, Accuracy = 0.9622




Test Loss: 0.0633, Test Accuracy: 0.9800
Model saved to ModelC.pth





--- Training ModelC | lr: 0.01, epochs: 10 ---
Epoch 1: Loss = 0.3388, Accuracy = 0.8911
Epoch 2: Loss = 0.1384, Accuracy = 0.9573
Epoch 3: Loss = 0.1209, Accuracy = 0.9635
Epoch 4: Loss = 0.1156, Accuracy = 0.9658
Epoch 5: Loss = 0.1110, Accuracy = 0.9670
Epoch 6: Loss = 0.1051, Accuracy = 0.9697
Epoch 7: Loss = 0.1056, Accuracy = 0.9700
Epoch 8: Loss = 0.1007, Accuracy = 0.9707
Epoch 9: Loss = 0.1012, Accuracy = 0.9709
Epoch 10: Loss = 0.0995, Accuracy = 0.9717




Test Loss: 0.0567, Test Accuracy: 0.9843
Model saved to ModelC.pth





--- Training ModelC | lr: 0.001, epochs: 5 ---
Epoch 1: Loss = 0.1583, Accuracy = 0.9530
Epoch 2: Loss = 0.0516, Accuracy = 0.9841
Epoch 3: Loss = 0.0388, Accuracy = 0.9868
Epoch 4: Loss = 0.0304, Accuracy = 0.9906
Epoch 5: Loss = 0.0238, Accuracy = 0.9922




Test Loss: 0.0276, Test Accuracy: 0.9915
Model saved to ModelC.pth





--- Training ModelC | lr: 0.001, epochs: 10 ---
Epoch 1: Loss = 0.1530, Accuracy = 0.9537
Epoch 2: Loss = 0.0502, Accuracy = 0.9843
Epoch 3: Loss = 0.0381, Accuracy = 0.9879
Epoch 4: Loss = 0.0294, Accuracy = 0.9905
Epoch 5: Loss = 0.0238, Accuracy = 0.9926
Epoch 6: Loss = 0.0195, Accuracy = 0.9934
Epoch 7: Loss = 0.0180, Accuracy = 0.9941
Epoch 8: Loss = 0.0154, Accuracy = 0.9951
Epoch 9: Loss = 0.0131, Accuracy = 0.9956
Epoch 10: Loss = 0.0134, Accuracy = 0.9952




Test Loss: 0.0276, Test Accuracy: 0.9920
Model saved to ModelC.pth


