In [9]:

import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler, MinMaxScaler, OneHotEncoder
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline
from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    precision_score,
    recall_score,
    f1_score,
)
from torch.utils.data import DataLoader, Dataset, random_split
import torchvision
from torchvision import datasets, transforms



In [10]:
# Load and normalize MNIST CSV (0-1)
df = pd.read_csv("mnist_train.csv")
print("Shape (raw):", df.shape)

# first column is label, remaining 784 columns are pixels
label_col = df.columns[0]
X = df.drop(columns=[label_col]).astype(np.float32)
y = df[label_col].astype(np.int64)

# Normalize to [0,1]
X_norm = X / 255.0

print("X_norm shape:", X_norm.shape, "y shape:", y.shape)
print("Min/Max after norm:", float(X_norm.min().min()), float(X_norm.max().max()))


Shape (raw): (60000, 785)
X_norm shape: (60000, 784) y shape: (60000,)
Min/Max after norm: 0.0 1.0


In [11]:
# First, split off 20% test from the full dataset
X_temp, X_test, y_temp, y_test = train_test_split(
    X_norm, y, test_size=0.20, stratify=y
)

# From the remaining 80%, take 25% as validation (0.25 * 0.80 = 0.20 overall)
X_train, X_val, y_train, y_val = train_test_split(
    X_temp, y_temp, test_size=0.25, stratify=y_temp
)

print(
    "Shapes ->",
    f"train: {X_train.shape}",
    f"val: {X_val.shape}",
    f"test: {X_test.shape}",
)

Shapes -> train: (36000, 784) val: (12000, 784) test: (12000, 784)


In [12]:
# Create PyTorch TensorDatasets and DataLoaders
import torch
from torch.utils.data import TensorDataset, DataLoader

BATCH_SIZE = 64

def make_loader(X, y, batch_size=BATCH_SIZE, shuffle=False):
    tensor_X = torch.tensor(X.values, dtype=torch.float32)
    tensor_y = torch.tensor(y.values, dtype=torch.long)
    ds = TensorDataset(tensor_X, tensor_y)
    return DataLoader(ds, batch_size=batch_size, shuffle=shuffle)

train_loader = make_loader(X_train, y_train, shuffle=True)
val_loader = make_loader(X_val, y_val)
test_loader = make_loader(X_test, y_test)

print(f"Num train batches: {len(train_loader)}")
print(f"Num val batches: {len(val_loader)}")
print(f"Num test batches: {len(test_loader)}")


Num train batches: 563
Num val batches: 188
Num test batches: 188
