In [None]:
class MLP(nn.Module):
    def __init__(self, params):
        super(MLP, self).__init__()
        self.num_input_features = params["num_input_features"]
        self.num_target_features = params["num_target_features"]
        self.dropout_rate=params["dropout_rate"]
        
        self.fc1 = nn.Linear(self.num_input_features, self.num_input_features*4)
        self.fc2 = nn.Linear(self.num_input_features*4, self.num_input_features*2)
        self.fc3 = nn.Linear(self.num_input_features*2, self.num_input_features)
        self.fc4 = nn.Linear(self.num_input_features, self.num_target_features)
        
    def forward(self, x):
        x = self.fc1(x)
        x = F.relu(x)
        x=F.dropout(x, self.dropout_rate)
        x = self.fc2(x)
        x = F.relu(x)
        x=F.dropout(x, self.dropout_rate)
        x = self.fc3(x)
        x = F.relu(x)
        x=F.dropout(x, self.dropout_rate)
        output = self.fc4(x)       
        
        return output

In [None]:
model = MLP(params['model'])
model = model.to(device)

In [None]:
class MLP2(nn.Module):
    def __init__(self, params):
        super(MLP2, self).__init__()
        self.num_input_features = params["num_input_features"]
        self.num_target_features = params["num_target_features"]
        self.dropout_rate = params["dropout_rate"]
        self.num_layers = params["num_layers"]
        
        layers = []
        input_features = self.num_input_features
        increasing_layers = self.num_layers // 2
        
        # Increasing part
        for i in range(increasing_layers):
            output_features = input_features * 2
            layers.append(nn.Linear(input_features, output_features))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(self.dropout_rate))
            input_features = output_features
        
        # Decreasing part
        for i in range(self.num_layers - increasing_layers - 1):
            output_features = input_features // 2
            layers.append(nn.Linear(input_features, output_features))
            layers.append(nn.ReLU())
            layers.append(nn.Dropout(self.dropout_rate))
            input_features = output_features
        
        # Final layer
        layers.append(nn.Linear(input_features, self.num_target_features))
        
        self.model = nn.Sequential(*layers)
        
    def forward(self, x):
        return self.model(x)

In [None]:
from torchinfo import summary

batch_size = 16
summary(model2, input_size=(1, 768))

In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
# from torchinfo import summary
from pathlib import Path

# ===== 1. Dataset setup =====
data_dir = Path(".")  # current dir containing train/ and test/
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(data_dir / "train", transform=transform)
test_dataset = datasets.ImageFolder(data_dir / "test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

num_classes = len(train_dataset.classes)
input_size = 64 * 64 * 3  # flattened image size

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# ===== 2. Model definitions =====
# 1) Linear model (no encoder)
class LinearModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc = nn.Linear(input_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)  # flatten
        return self.fc(x)

# 2) Small Neural Net (2 layers)
class SmallNN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size=256):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

# 3) Four-layer MLP (your original MLP)
class MLP4(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.2):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, num_classes)
        self.dropout_rate = dropout_rate
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, self.dropout_rate)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, self.dropout_rate)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, self.dropout_rate)
        return self.fc4(x)


# ===== 3. Instantiate models =====
model1 = LinearModel(input_size, num_classes).to(device)
model2 = SmallNN(input_size, num_classes).to(device)
model3 = MLP4(input_size, num_classes).to(device)

# ===== 4. Summaries =====
# print("=== Linear Model ===")
# summary(model1, input_size=(1, 3, 64, 64))
# print("\n=== Small NN ===")
# summary(model2, input_size=(1, 3, 64, 64))
# print("\n=== Four-layer MLP ===")
# summary(model3, input_size=(1, 3, 64, 64))

# ===== 5. Quick forward pass =====
sample_data, _ = next(iter(train_loader))
sample_data = sample_data.to(device)

with torch.no_grad():
    out1 = model1(sample_data)
    out2 = model2(sample_data)
    out3 = model3(sample_data)

print("\nOutput shapes:")
print("Linear:", out1.shape)
print("Small NN:", out2.shape)
print("Four-layer MLP:", out3.shape)



Output shapes:
Linear: torch.Size([16, 2])
Small NN: torch.Size([16, 2])
Four-layer MLP: torch.Size([16, 2])


In [3]:
import torch.optim as optim
import mlflow
import mlflow.pytorch
import torch.nn as nn

def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total


def eval_model(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)

            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)

    return running_loss / total, correct / total


# Hyperparameters
epochs = 5
learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()

models = {
    "Linear": model1,
    "SmallNN": model2,
    "MLP4": model3
}

if mlflow.active_run():
    mlflow.end_run()

for name, model in models.items():
    with mlflow.start_run(run_name=name):
        print(f"\nTraining {name}...")
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        # Log hyperparameters
        mlflow.log_param("model_name", name)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("epochs", epochs)

        for epoch in range(epochs):
            train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
            test_loss, test_acc = eval_model(model, test_loader, criterion, device)

            mlflow.log_metric("train_loss", train_loss, step=epoch)
            mlflow.log_metric("train_accuracy", train_acc, step=epoch)
            mlflow.log_metric("test_loss", test_loss, step=epoch)
            mlflow.log_metric("test_accuracy", test_acc, step=epoch)

            print(
                f"Epoch {epoch+1}/{epochs} - "
                f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} | "
                f"Test loss: {test_loss:.4f}, Test acc: {test_acc:.4f}"
            )

        example_input = torch.randn(1, 3, 64, 64).to(device)
        mlflow.pytorch.log_model(model, artifact_path="models")



Training Linear...
Epoch 1/5 - Train loss: 0.8502, Train acc: 0.6105 | Test loss: 1.8078, Test acc: 0.5083
Epoch 2/5 - Train loss: 0.8570, Train acc: 0.6095 | Test loss: 0.9808, Test acc: 0.5516
Epoch 3/5 - Train loss: 0.8654, Train acc: 0.6111 | Test loss: 0.7352, Test acc: 0.6149
Epoch 4/5 - Train loss: 0.8510, Train acc: 0.6131 | Test loss: 1.1253, Test acc: 0.5545




Epoch 5/5 - Train loss: 0.8642, Train acc: 0.6088 | Test loss: 0.8507, Test acc: 0.5780





Training SmallNN...
Epoch 1/5 - Train loss: 0.5509, Train acc: 0.7151 | Test loss: 0.5256, Test acc: 0.7334
Epoch 2/5 - Train loss: 0.5371, Train acc: 0.7254 | Test loss: 0.5419, Test acc: 0.7255
Epoch 3/5 - Train loss: 0.5307, Train acc: 0.7308 | Test loss: 0.5434, Test acc: 0.7182
Epoch 4/5 - Train loss: 0.5275, Train acc: 0.7328 | Test loss: 0.5331, Test acc: 0.7302




Epoch 5/5 - Train loss: 0.5241, Train acc: 0.7370 | Test loss: 0.5609, Test acc: 0.7027





Training MLP4...
Epoch 1/5 - Train loss: 0.6333, Train acc: 0.6273 | Test loss: 0.6203, Test acc: 0.6262
Epoch 2/5 - Train loss: 0.6086, Train acc: 0.6595 | Test loss: 0.6054, Test acc: 0.6585
Epoch 3/5 - Train loss: 0.6011, Train acc: 0.6674 | Test loss: 0.5863, Test acc: 0.6832
Epoch 4/5 - Train loss: 0.6013, Train acc: 0.6625 | Test loss: 0.6054, Test acc: 0.6474




Epoch 5/5 - Train loss: 0.6051, Train acc: 0.6561 | Test loss: 0.6033, Test acc: 0.6426




In [None]:
from sklearn.neighbors import KNeighborsClassifier
from sklearn.metrics import accuracy_score
import torch
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from pathlib import Path
import numpy as np

data_dir = Path(".")
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(data_dir / "train", transform=transform)
test_dataset = datasets.ImageFolder(data_dir / "test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=32, shuffle=False)
test_loader = DataLoader(test_dataset, batch_size=32, shuffle=False)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# Define a frozen encoder flatten
class FrozenEncoder(torch.nn.Module):
    def forward(self, x):
        return x.view(x.size(0), -1)  # flatten

encoder = FrozenEncoder().to(device)
encoder.eval()  # freeze, no training

# extract features for train and test
def extract_features(loader):
    features, labels = [], []
    with torch.no_grad():
        for imgs, lbls in loader:
            imgs = imgs.to(device)
            feats = encoder(imgs)          # frozen features
            features.append(feats.cpu().numpy())
            labels.append(lbls.numpy())
    return np.vstack(features), np.hstack(labels)

X_train, y_train = extract_features(train_loader)
X_test, y_test = extract_features(test_loader)

# train a simple KNN classifier
knn = KNeighborsClassifier(n_neighbors=3)
knn.fit(X_train, y_train)

y_pred = knn.predict(X_test)
acc = accuracy_score(y_test, y_pred)
print(f"KNN (frozen encoder) accuracy: {acc:.4f}")


or set the environment variable OPENBLAS_NUM_THREADS to 64 or lower


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from pathlib import Path
import torch.optim as optim
import mlflow
import mlflow.pytorch
import matplotlib.pyplot as plt

# dataset setup
data_dir = Path(".")
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(data_dir / "train", transform=transform)
test_dataset = datasets.ImageFolder(data_dir / "test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

num_classes = len(train_dataset.classes)
input_size = 64 * 64 * 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# models
class LinearModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc = nn.Linear(input_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.fc(x)

class SmallNN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size=256):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

class MLP4(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.2):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.fc2 = nn.Linear(1024, 512)
        self.fc3 = nn.Linear(512, 256)
        self.fc4 = nn.Linear(256, num_classes)
        self.dropout_rate = dropout_rate
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        x = F.dropout(x, self.dropout_rate, training=self.training)
        x = F.relu(self.fc2(x))
        x = F.dropout(x, self.dropout_rate, training=self.training)
        x = F.relu(self.fc3(x))
        x = F.dropout(x, self.dropout_rate, training=self.training)
        return self.fc4(x)

# training helpers
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total

def eval_model(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total

# Setup
epochs = 5
learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()

models = {
    "Linear": LinearModel(input_size, num_classes).to(device),
    "SmallNN": SmallNN(input_size, num_classes).to(device),
    "MLP4": MLP4(input_size, num_classes).to(device)
}

if mlflow.active_run():
    mlflow.end_run()

results = {}

# training loop
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        print(f"\nTraining {name}...")
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        mlflow.log_param("model_name", name)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("epochs", epochs)

        train_losses, test_losses = [], []
        train_accs, test_accs = [], []

        for epoch in range(epochs):
            train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
            test_loss, test_acc = eval_model(model, test_loader, criterion, device)

            # Save metrics
            train_losses.append(train_loss)
            test_losses.append(test_loss)
            train_accs.append(train_acc)
            test_accs.append(test_acc)

            # Log to MLflow
            mlflow.log_metric("train_loss", train_loss, step=epoch)
            mlflow.log_metric("train_accuracy", train_acc, step=epoch)
            mlflow.log_metric("test_loss", test_loss, step=epoch)
            mlflow.log_metric("test_accuracy", test_acc, step=epoch)

            print(
                f"Epoch {epoch+1}/{epochs} - "
                f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} | "
                f"Test loss: {test_loss:.4f}, Test acc: {test_acc:.4f}"
            )

        example_input = torch.randn(1, 3, 64, 64).to(device)
        mlflow.pytorch.log_model(model, artifact_path="models")

        # Save results for plotting later
        results[name] = {
            "train_loss": train_losses,
            "test_loss": test_losses,
            "train_acc": train_accs,
            "test_acc": test_accs
        }

# plot results
plt.figure(figsize=(10,5))
for name, data in results.items():
    plt.plot(data["train_loss"], label=f"{name} Train Loss")
    plt.plot(data["test_loss"], label=f"{name} Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss per Epoch")
plt.legend()
plt.grid(True)
plt.savefig("loss_plot.png", dpi=300)
plt.close()

plt.figure(figsize=(10,5))
for name, data in results.items():
    plt.plot(data["train_acc"], label=f"{name} Train Acc")
    plt.plot(data["test_acc"], label=f"{name} Test Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy per Epoch")
plt.legend()
plt.grid(True)
plt.savefig("accuracy_plot.png", dpi=300)
plt.close()

print("\nTraining complete. Plots saved as 'loss_plot.png' and 'accuracy_plot.png'.")



Training Linear...
Epoch 1/5 - Train loss: 0.8695, Train acc: 0.5955 | Test loss: 1.6715, Test acc: 0.5184
Epoch 2/5 - Train loss: 0.8710, Train acc: 0.6042 | Test loss: 1.1568, Test acc: 0.5459
Epoch 3/5 - Train loss: 0.8638, Train acc: 0.6077 | Test loss: 0.7346, Test acc: 0.6090
Epoch 4/5 - Train loss: 0.8761, Train acc: 0.6088 | Test loss: 0.6707, Test acc: 0.6509




Epoch 5/5 - Train loss: 0.8704, Train acc: 0.6075 | Test loss: 1.6863, Test acc: 0.5072





Training SmallNN...
Epoch 1/5 - Train loss: 0.6027, Train acc: 0.6665 | Test loss: 0.5645, Test acc: 0.7042
Epoch 2/5 - Train loss: 0.5605, Train acc: 0.7058 | Test loss: 0.5485, Test acc: 0.7188
Epoch 3/5 - Train loss: 0.5535, Train acc: 0.7127 | Test loss: 0.5447, Test acc: 0.7184
Epoch 4/5 - Train loss: 0.5476, Train acc: 0.7187 | Test loss: 0.5440, Test acc: 0.7149




Epoch 5/5 - Train loss: 0.5462, Train acc: 0.7201 | Test loss: 0.5333, Test acc: 0.7282





Training MLP4...
Epoch 1/5 - Train loss: 0.6540, Train acc: 0.6046 | Test loss: 0.6351, Test acc: 0.6240
Epoch 2/5 - Train loss: 0.6412, Train acc: 0.6266 | Test loss: 0.6169, Test acc: 0.6536
Epoch 3/5 - Train loss: 0.6431, Train acc: 0.6181 | Test loss: 0.6278, Test acc: 0.6371
Epoch 4/5 - Train loss: 0.6421, Train acc: 0.6213 | Test loss: 0.6425, Test acc: 0.6340




Epoch 5/5 - Train loss: 0.6446, Train acc: 0.6134 | Test loss: 0.6345, Test acc: 0.6239





✅ Training complete. Plots saved as 'loss_plot.png' and 'accuracy_plot.png'.


In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from torchvision import datasets, transforms
from torch.utils.data import DataLoader
from pathlib import Path
import torch.optim as optim
import mlflow
import mlflow.pytorch
import matplotlib.pyplot as plt

# dataset setup
data_dir = Path(".")
transform = transforms.Compose([
    transforms.Resize((64, 64)),
    transforms.ToTensor()
])

train_dataset = datasets.ImageFolder(data_dir / "train", transform=transform)
test_dataset = datasets.ImageFolder(data_dir / "test", transform=transform)

train_loader = DataLoader(train_dataset, batch_size=16, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=16, shuffle=False)

num_classes = len(train_dataset.classes)
input_size = 64 * 64 * 3
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

# models
class LinearModel(nn.Module):
    def __init__(self, input_size, num_classes):
        super().__init__()
        self.fc = nn.Linear(input_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        return self.fc(x)

class SmallNN(nn.Module):
    def __init__(self, input_size, num_classes, hidden_size=256):
        super().__init__()
        self.fc1 = nn.Linear(input_size, hidden_size)
        self.fc2 = nn.Linear(hidden_size, num_classes)
    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = F.relu(self.fc1(x))
        return self.fc2(x)

class MLP4_BN(nn.Module):
    def __init__(self, input_size, num_classes, dropout_rate=0.3):
        super().__init__()
        self.fc1 = nn.Linear(input_size, 1024)
        self.bn1 = nn.BatchNorm1d(1024)
        self.fc2 = nn.Linear(1024, 512)
        self.bn2 = nn.BatchNorm1d(512)
        self.fc3 = nn.Linear(512, 256)
        self.bn3 = nn.BatchNorm1d(256)
        self.fc4 = nn.Linear(256, num_classes)
        self.dropout = nn.Dropout(dropout_rate)

    def forward(self, x):
        x = x.view(x.size(0), -1)
        x = self.dropout(F.relu(self.bn1(self.fc1(x))))
        x = self.dropout(F.relu(self.bn2(self.fc2(x))))
        x = self.dropout(F.relu(self.bn3(self.fc3(x))))
        return self.fc4(x)

# training helpers
def train_epoch(model, dataloader, criterion, optimizer, device):
    model.train()
    running_loss, correct, total = 0, 0, 0
    for inputs, labels in dataloader:
        inputs, labels = inputs.to(device), labels.to(device)
        optimizer.zero_grad()
        outputs = model(inputs)
        loss = criterion(outputs, labels)
        loss.backward()
        optimizer.step()

        running_loss += loss.item() * inputs.size(0)
        _, predicted = outputs.max(1)
        correct += predicted.eq(labels).sum().item()
        total += labels.size(0)

    return running_loss / total, correct / total

def eval_model(model, dataloader, criterion, device):
    model.eval()
    running_loss, correct, total = 0, 0, 0
    with torch.no_grad():
        for inputs, labels in dataloader:
            inputs, labels = inputs.to(device), labels.to(device)
            outputs = model(inputs)
            loss = criterion(outputs, labels)
            running_loss += loss.item() * inputs.size(0)
            _, predicted = outputs.max(1)
            correct += predicted.eq(labels).sum().item()
            total += labels.size(0)
    return running_loss / total, correct / total

# Setup
epochs = 5
learning_rate = 1e-3
criterion = nn.CrossEntropyLoss()

models = {
    "Linear": LinearModel(input_size, num_classes).to(device),
    "SmallNN": SmallNN(input_size, num_classes).to(device),
    "MLP4_BN": MLP4_BN(input_size, num_classes).to(device)
}

if mlflow.active_run():
    mlflow.end_run()

results = {}

# training loop
for name, model in models.items():
    with mlflow.start_run(run_name=name):
        print(f"\nTraining {name}...")
        optimizer = optim.Adam(model.parameters(), lr=learning_rate)

        mlflow.log_param("model_name", name)
        mlflow.log_param("learning_rate", learning_rate)
        mlflow.log_param("epochs", epochs)

        train_losses, test_losses = [], []
        train_accs, test_accs = [], []

        for epoch in range(epochs):
            train_loss, train_acc = train_epoch(model, train_loader, criterion, optimizer, device)
            test_loss, test_acc = eval_model(model, test_loader, criterion, device)

            # Save metrics
            train_losses.append(train_loss)
            test_losses.append(test_loss)
            train_accs.append(train_acc)
            test_accs.append(test_acc)

            # Log to MLflow
            mlflow.log_metric("train_loss", train_loss, step=epoch)
            mlflow.log_metric("train_accuracy", train_acc, step=epoch)
            mlflow.log_metric("test_loss", test_loss, step=epoch)
            mlflow.log_metric("test_accuracy", test_acc, step=epoch)

            print(
                f"Epoch {epoch+1}/{epochs} - "
                f"Train loss: {train_loss:.4f}, Train acc: {train_acc:.4f} | "
                f"Test loss: {test_loss:.4f}, Test acc: {test_acc:.4f}"
            )

        example_input = torch.randn(1, 3, 64, 64).to(device)
        mlflow.pytorch.log_model(model, artifact_path="models")

        # Save results for plotting later
        results[name] = {
            "train_loss": train_losses,
            "test_loss": test_losses,
            "train_acc": train_accs,
            "test_acc": test_accs
        }

# plot results
plt.figure(figsize=(10,5))
for name, data in results.items():
    plt.plot(data["train_loss"], label=f"{name} Train Loss")
    plt.plot(data["test_loss"], label=f"{name} Test Loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Loss per Epoch")
plt.legend()
plt.grid(True)
plt.savefig("loss_plot.png", dpi=300)
plt.close()

plt.figure(figsize=(10,5))
for name, data in results.items():
    plt.plot(data["train_acc"], label=f"{name} Train Acc")
    plt.plot(data["test_acc"], label=f"{name} Test Acc")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Accuracy per Epoch")
plt.legend()
plt.grid(True)
plt.savefig("accuracy_plot.png", dpi=300)
plt.close()

print("\nTraining complete. Plots saved as 'loss_plot.png' and 'accuracy_plot.png'.")



Training Linear...
Epoch 1/5 - Train loss: 0.8826, Train acc: 0.5966 | Test loss: 1.0929, Test acc: 0.5483
Epoch 2/5 - Train loss: 0.8762, Train acc: 0.6012 | Test loss: 0.7277, Test acc: 0.6072
Epoch 3/5 - Train loss: 0.8931, Train acc: 0.6056 | Test loss: 0.7036, Test acc: 0.6392
Epoch 4/5 - Train loss: 0.8650, Train acc: 0.6061 | Test loss: 1.8998, Test acc: 0.5086




Epoch 5/5 - Train loss: 0.8657, Train acc: 0.6088 | Test loss: 0.7520, Test acc: 0.6449





Training SmallNN...
Epoch 1/5 - Train loss: 0.6059, Train acc: 0.6672 | Test loss: 0.5437, Test acc: 0.7167
Epoch 2/5 - Train loss: 0.5535, Train acc: 0.7143 | Test loss: 0.5353, Test acc: 0.7238
Epoch 3/5 - Train loss: 0.5397, Train acc: 0.7248 | Test loss: 0.5242, Test acc: 0.7358
Epoch 4/5 - Train loss: 0.5303, Train acc: 0.7322 | Test loss: 0.5364, Test acc: 0.7179




Epoch 5/5 - Train loss: 0.5244, Train acc: 0.7352 | Test loss: 0.5406, Test acc: 0.7230





Training MLP4_BN...
Epoch 1/5 - Train loss: 0.5465, Train acc: 0.7223 | Test loss: 0.4705, Test acc: 0.7766
Epoch 2/5 - Train loss: 0.5004, Train acc: 0.7562 | Test loss: 0.4432, Test acc: 0.7972
Epoch 3/5 - Train loss: 0.4757, Train acc: 0.7752 | Test loss: 0.4157, Test acc: 0.8080
Epoch 4/5 - Train loss: 0.4574, Train acc: 0.7849 | Test loss: 0.4368, Test acc: 0.7927




Epoch 5/5 - Train loss: 0.4439, Train acc: 0.7928 | Test loss: 0.3962, Test acc: 0.8210





Training complete. Plots saved as 'loss_plot.png' and 'accuracy_plot.png'.
