In [None]:
from pathlib import Path

import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from tqdm.auto import tqdm

data_dir = Path("data")

In [None]:
X_train = np.load(data_dir / "train_data.npy")
y_train = np.load(data_dir / "train_label.npy")
X_test = np.load(data_dir / "test_data.npy")
y_test = np.load(data_dir / "test_label.npy")

print(f"{X_train.shape = }")
print(f"{y_train.shape = }")
print(f"{X_test.shape = }")
print(f"{y_test.shape = }")

X_train.shape = (50000, 128)
y_train.shape = (50000, 1)
X_test.shape = (10000, 128)
y_test.shape = (10000, 1)


In [None]:
def get_available_device():
    if torch.cuda.is_available():
        device = torch.device("cuda")
    elif torch.backends.mps.is_available():
        device = torch.device("mps")
    else:
        device = torch.device("cpu")
    return device

In [None]:
# Convert data to PyTorch tensors
X_train_tensor = torch.tensor(X_train, dtype=torch.float32)
y_train_tensor = torch.tensor(y_train, dtype=torch.long).squeeze()
X_test_tensor = torch.tensor(X_test, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test, dtype=torch.long).squeeze()

# Create DataLoader
batch_size = 64
train_dataset = TensorDataset(X_train_tensor, y_train_tensor)
test_dataset = TensorDataset(X_test_tensor, y_test_tensor)
train_loader = DataLoader(train_dataset, batch_size=batch_size, shuffle=True)
test_loader = DataLoader(test_dataset, batch_size=batch_size, shuffle=False)

# Define MLP Model
class MLP(nn.Module):
    def __init__(self, input_dim=128, num_classes=10):
        super(MLP, self).__init__()
        self.model = nn.Sequential(
            nn.Linear(input_dim, 256),
            nn.ReLU(),
            nn.BatchNorm1d(256),
            nn.Dropout(0.5),
            nn.Linear(256, 128),
            nn.ReLU(),
            nn.BatchNorm1d(128),
            nn.Dropout(0.5),
            nn.Linear(128, num_classes)
        )

    def forward(self, x):
        return self.model(x)

# Initialize model, loss function, and optimizer
device = get_available_device()
model = MLP().to(device)
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=1e-4, weight_decay=1e-5)

# Training parameters
num_epochs = 200
log_interval = 10  # Print every X epochs

for epoch in tqdm(range(1, num_epochs + 1), desc="Training Progress"):
    model.train()
    total_loss, correct, total = 0, 0, 0

    for X_batch, y_batch in train_loader:
        X_batch, y_batch = X_batch.to(device), y_batch.to(device)

        optimizer.zero_grad()
        outputs = model(X_batch)
        loss = criterion(outputs, y_batch)
        loss.backward()
        optimizer.step()

        total_loss += loss.item()
        correct += (outputs.argmax(dim=1) == y_batch).sum().item()
        total += y_batch.size(0)

    train_loss = total_loss / len(train_loader)
    train_acc = correct / total

    # Testing (only on log intervals)
    if epoch % log_interval == 0:
        model.eval()
        test_loss, correct, total = 0, 0, 0
        with torch.no_grad():
            for X_batch, y_batch in test_loader:
                X_batch, y_batch = X_batch.to(device), y_batch.to(device)
                outputs = model(X_batch)
                loss = criterion(outputs, y_batch)
                test_loss += loss.item()
                correct += (outputs.argmax(dim=1) == y_batch).sum().item()
                total += y_batch.size(0)

        test_loss /= len(test_loader)
        test_acc = correct / total

        print(f"Epoch {epoch}: Train Loss={train_loss:.4f}, Train Acc={train_acc:.4f}, "
              f"Test Loss={test_loss:.4f}, Test Acc={test_acc:.4f}")

Training Progress:   0%|          | 0/200 [00:00<?, ?it/s]

Epoch 10: Train Loss=1.6027, Train Acc=0.4318, Test Loss=1.4665, Test Acc=0.4843
Epoch 20: Train Loss=1.4817, Train Acc=0.4745, Test Loss=1.3613, Test Acc=0.5173
Epoch 30: Train Loss=1.4144, Train Acc=0.4977, Test Loss=1.3091, Test Acc=0.5346
Epoch 40: Train Loss=1.3664, Train Acc=0.5131, Test Loss=1.2753, Test Acc=0.5476
Epoch 50: Train Loss=1.3398, Train Acc=0.5221, Test Loss=1.2535, Test Acc=0.5547
Epoch 60: Train Loss=1.3070, Train Acc=0.5339, Test Loss=1.2386, Test Acc=0.5585
Epoch 70: Train Loss=1.2916, Train Acc=0.5385, Test Loss=1.2267, Test Acc=0.5640
Epoch 80: Train Loss=1.2716, Train Acc=0.5461, Test Loss=1.2186, Test Acc=0.5660
Epoch 90: Train Loss=1.2647, Train Acc=0.5486, Test Loss=1.2099, Test Acc=0.5682
Epoch 100: Train Loss=1.2506, Train Acc=0.5556, Test Loss=1.2082, Test Acc=0.5714
Epoch 110: Train Loss=1.2449, Train Acc=0.5560, Test Loss=1.2040, Test Acc=0.5672
Epoch 120: Train Loss=1.2358, Train Acc=0.5632, Test Loss=1.2048, Test Acc=0.5671
Epoch 130: Train Loss=1.2