In [10]:
import torch
import torch.nn as nn
from torch.utils.data import Dataset, DataLoader
import pandas as pd
from sklearn.model_selection import train_test_split
import numpy as np
from tqdm import tqdm

# Custom Dataset class
class MNISTSequenceDataset(Dataset):
    def __init__(self, data, labels, mean=0.1307, std=0.3081):
        # First normalize to [0,1] by dividing by 255
        normalized_data = data.astype(np.float32) / 255.0
        
        # Then apply mean/std normalization
        normalized_data = (normalized_data - mean) / std
        
        self.data = torch.FloatTensor(normalized_data)
        self.labels = torch.LongTensor(labels)
    
    def __len__(self):
        return len(self.labels)
    
    def __getitem__(self, idx):
        return self.data[idx], self.labels[idx]

# MLP Model
class MNISTMLPClassifier(nn.Module):
    def __init__(self, input_size=784, hidden_size=[100,50], num_classes=10):
        super().__init__()
        self.input_size = input_size
        self.num_classes = num_classes
        self.model = nn.Sequential(
            nn.Linear(input_size, hidden_size[0]),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(hidden_size[0], hidden_size[1]),
            nn.ReLU(),
            nn.Dropout(0.25),
            nn.Linear(hidden_size[1], num_classes)
        )
    
    def forward(self, x):
        x = x.view(-1, self.input_size)
        return self.model(x)

# Training function
def train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device):
    train_losses = []
    val_losses = []
    train_accs = []
    val_accs = []
    for epoch in range(num_epochs):
        model.train()
        train_loss = 0
        train_correct = 0
        train_total = 0
        progress_bar = tqdm(train_loader, desc=f"Epoch {epoch + 1}/num_epochs")
        for images, labels in progress_bar:
            images = images.to(device)
            labels = labels.to(device)
            
            # Reshape images to sequence format (batch_size, sequence_length, input_size)
            images = images.view(-1, 784, 1)
            
            # Forward pass
            outputs = model(images)
            loss = criterion(outputs, labels)
            
            # Backward and optimize
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()
            
            train_loss += loss.item()
            _, predicted = torch.max(outputs.data, 1)
            train_total += labels.size(0)
            train_correct += (predicted == labels).sum().item()
            
            progress_bar.set_postfix({
            "Loss": loss.item(),
        })

        train_losses.append(train_loss)
        train_accs.append(train_correct / train_total)
        
        # Validation
        model.eval()
        val_loss = 0
        val_correct = 0
        val_total = 0
        
        with torch.no_grad():
            for images, labels in val_loader:
                images = images.to(device)
                labels = labels.to(device)
                images = images.view(-1, 784, 1)
                
                outputs = model(images)
                loss = criterion(outputs, labels)
                
                val_loss += loss.item()
                _, predicted = torch.max(outputs.data, 1)
                val_total += labels.size(0)
                val_correct += (predicted == labels).sum().item()

            val_losses.append(val_loss)
            val_accs.append(val_correct / val_total)

    return train_losses, val_losses, train_accs, val_accs


# Set device
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Load and prepare data
df = pd.read_csv('./data/mnist/train.csv')
labels = df['label'].values
pixels = df.drop('label', axis=1).values

# Split the data
X_train, X_val, y_train, y_val = train_test_split(pixels, labels, test_size=0.2, random_state=42)

# Create datasets
train_dataset = MNISTSequenceDataset(X_train, y_train)
val_dataset = MNISTSequenceDataset(X_val, y_val)

# Create data loaders
train_loader = DataLoader(train_dataset, batch_size=128, shuffle=True)
val_loader = DataLoader(val_dataset, batch_size=256, shuffle=False)

# Initialize model
model = MNISTMLPClassifier().to(device)

# Loss and optimizer
criterion = nn.CrossEntropyLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=0.001)

# Train the model
num_epochs = 50
train_losses, val_losses, train_accs, val_accs = train_model(model, train_loader, val_loader, criterion, optimizer, num_epochs, device)

Epoch 1/num_epochs: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 263/263 [00:00<00:00, 325.89it/s, Loss=0.181]
Epoch 2/num_epochs: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 263/263 [00:00<00:00, 318.26it/s, Loss=0.17]
Epoch 3/num_epochs: 100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 263/263 [00:00<00:00, 325.19it/s, Loss=0.14]
Epoch 4/num_epochs: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 263/263 [00:00<00:00, 324.45it/s, Loss=0.203]
Epoch 5/num_epochs: 100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████