# PyTorch MLP on Tabular Medical Data (Breast Cancer)

Goal:
- Load a tabular breast cancer dataset
- Preprocess and split data
- Build a neural network in PyTorch
- Train with a standard PyTorch training loop
- Evaluate performance
- Experiment with architecture and hyperparameters

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt

from sklearn.datasets import load_breast_cancer
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, roc_auc_score

import torch
import torch.nn as nn
from torch.utils.data import TensorDataset, DataLoader

In [None]:
SEED = 42
np.random.seed(SEED)
torch.manual_seed(SEED)

device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
print("Using device:", device)

### 1) Load breast cancer dataset

In [None]:
data = load_breast_cancer(as_frame=True)
X = data.data
y = data.target  # 0/1 binary labels

print("Features shape:", X.shape)
print("Target shape:", y.shape)
print("Classes:", data.target_names.tolist())

In [None]:
display(X.head())
display(y.head())

### 2) Train/validation/test split
We use:
- train set: for fitting the model
- validation set: for monitoring/tuning
- test set: final evaluation

In [None]:
X_trainval, X_test, y_trainval, y_test = train_test_split(
    X, y,
    test_size=0.2,
    stratify=y,
    random_state=SEED
)

X_train, X_val, y_train, y_val = train_test_split(
    X_trainval, y_trainval,
    test_size=0.2,
    stratify=y_trainval,
    random_state=SEED

print("Train:", X_train.shape, y_train.shape)
print("Val:  ", X_val.shape, y_val.shape)
print("Test: ", X_test.shape, y_test.shape)

### 3) Feature scaling (important for neural networks)

Fit scaler on training data only, then transform val/test.

In [None]:
scaler = StandardScaler()

X_train_scaled = scaler.fit_transform(X_train)
X_val_scaled = scaler.transform(X_val)
X_test_scaled = scaler.transform(X_test)

y_train_np = y_train.to_numpy().astype(np.float32).reshape(-1, 1)
y_val_np = y_val.to_numpy().astype(np.float32).reshape(-1, 1)
y_test_np = y_test.to_numpy().astype(np.float32).reshape(-1, 1)

### 4) Convert to PyTorch tensors + DataLoaders

In [None]:
X_train_tensor = torch.tensor(X_train_scaled, dtype=torch.float32)
X_val_tensor = torch.tensor(X_val_scaled, dtype=torch.float32)
X_test_tensor = torch.tensor(X_test_scaled, dtype=torch.float32)

y_train_tensor = torch.tensor(y_train_np, dtype=torch.float32)
y_val_tensor = torch.tensor(y_val_np, dtype=torch.float32)
y_test_tensor = torch.tensor(y_test_np, dtype=torch.float32)

train_ds = TensorDataset(X_train_tensor, y_train_tensor)
val_ds = TensorDataset(X_val_tensor, y_val_tensor)
test_ds = TensorDataset(X_test_tensor, y_test_tensor)

train_loader = DataLoader(train_ds, batch_size=32, shuffle=True)
val_loader = DataLoader(val_ds, batch_size=64, shuffle=False)
test_loader = DataLoader(test_ds, batch_size=64, shuffle=False)

print("Train batches:", len(train_loader))
print("Val batches:", len(val_loader))

### 5) Define the MLP model

Participants can change:
- hidden_dims
- activation function
- dropout

In [None]:
class NeuralNetwork(nn.Module):
    def __init__(self, input_dim, hidden_dims=(32, 16), activation="relu", dropout=0.0):
        super().__init__()

        act_map = {
            "relu": nn.ReLU,
            "tanh": nn.Tanh,
            "sigmoid": nn.Sigmoid
        }

        act_layer = act_map[activation.lower()]

        layers = []
        prev_dim = input_dim

        for h in hidden_dims:
            layers.append(nn.Linear(prev_dim, h))
            layers.append(act_layer())
            if dropout > 0:
                layers.append(nn.Dropout(dropout))
            prev_dim = h

        # Binary classification output (logit)
        layers.append(nn.Linear(prev_dim, 1))

        self.network = nn.Sequential(*layers)

    def forward(self, x):
        return self.network(x)

In [None]:
input_dim = X_train_tensor.shape[1]

model = NeuralNetwork(
    input_dim=input_dim,
    hidden_dims=(32, 16),      # try (16,), (64, 32), (64, 32, 16), ...
    activation="relu",         # try "tanh"
    dropout=0.1                # try 0.0, 0.2, 0.5
).to(device)

print(model)

### 6) Define loss and optimizer

We use:
- `BCEWithLogitsLoss` for binary classification
- `Adam` optimizer for stable training

In [None]:
criterion = nn.BCEWithLogitsLoss()
optimizer = torch.optim.Adam(model.parameters(), lr=1e-3)

### 7) Helper functions for training and evaluation

In [None]:
def train_one_epoch(model, loader, criterion, optimizer, device):
    model.train()

    running_loss = 0.0
    all_logits = []
    all_targets = []

    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)

        optimizer.zero_grad()

        logits = model(xb)                  # shape: (batch, 1)
        loss = criterion(logits, yb)

        loss.backward()
        optimizer.step()

        running_loss += loss.item() * xb.size(0)
        all_logits.append(logits.detach().cpu())
        all_targets.append(yb.detach().cpu())

    epoch_loss = running_loss / len(loader.dataset)

    all_logits = torch.cat(all_logits).numpy()
    all_targets = torch.cat(all_targets).numpy()

    probs = 1 / (1 + np.exp(-all_logits))  # sigmoid for probabilities
    preds = (probs >= 0.5).astype(np.float32)

    epoch_acc = accuracy_score(all_targets, preds)

    return epoch_loss, epoch_acc


@torch.no_grad()
def evaluate(model, loader, criterion, device):
    model.eval()

    running_loss = 0.0
    all_logits = []
    all_targets = []

    for xb, yb in loader:
        xb = xb.to(device)
        yb = yb.to(device)

        logits = model(xb)
        loss = criterion(logits, yb)

        running_loss += loss.item() * xb.size(0)
        all_logits.append(logits.cpu())
        all_targets.append(yb.cpu())

    epoch_loss = running_loss / len(loader.dataset)

    all_logits = torch.cat(all_logits).numpy()
    all_targets = torch.cat(all_targets).numpy()

    probs = 1 / (1 + np.exp(-all_logits))
    preds = (probs >= 0.5).astype(np.float32)

    epoch_acc = accuracy_score(all_targets, preds)

    return epoch_loss, epoch_acc, epoch_auc, probs, preds, all_targets

### 8) Training loop (multiple epochs)

In [None]:
num_epochs = 50

history = {
    "train_loss": [],
    "train_acc": [],
    "val_loss": [],
    "val_acc": [],
    "val_auc": []
}

best_val_loss = float("inf")
best_state_dict = None

for epoch in range(1, num_epochs + 1):
    train_loss, train_acc = train_one_epoch(model, train_loader, criterion, optimizer, device)
    val_loss, val_acc, val_auc, _, _, _ = evaluate(model, val_loader, criterion, device)

    history["train_loss"].append(train_loss)
    history["train_acc"].append(train_acc)
    history["val_loss"].append(val_loss)
    history["val_acc"].append(val_acc)
    history["val_auc"].append(val_auc)

    if val_loss < best_val_loss:
        best_val_loss = val_loss
        best_state_dict = {k: v.cpu().clone() for k, v in model.state_dict().items()}

    if epoch == 1 or epoch % 10 == 0 or epoch == num_epochs:
        print(
            f"Epoch {epoch:03d}/{num_epochs} | "
            f"train_loss={train_loss:.4f}, train_acc={train_acc:.4f} | "
            f"val_loss={val_loss:.4f}, val_acc={val_acc:.4f}, val_auc={val_auc:.4f}"
        )

# Restore best validation model
if best_state_dict is not None:
    model.load_state_dict(best_state_dict)

### 9) Plot training curves

In [None]:
epochs = np.arange(1, num_epochs + 1)

plt.figure(figsize=(7, 4))
plt.plot(epochs, history["train_loss"], label="Train loss")
plt.plot(epochs, history["val_loss"], label="Validation loss")
plt.xlabel("Epoch")
plt.ylabel("Loss")
plt.title("Training and validation loss")
plt.legend()
plt.show()

plt.figure(figsize=(7, 4))
plt.plot(epochs, history["train_acc"], label="Train accuracy")
plt.plot(epochs, history["val_acc"], label="Validation accuracy")
plt.xlabel("Epoch")
plt.ylabel("Accuracy")
plt.title("Training and validation accuracy")
plt.legend()
plt.show()

### 10) Final evaluation on test set

In [None]:
test_loss, test_acc, test_auc, test_probs, test_preds, test_targets = evaluate(
    model, test_loader, criterion, device
)

print(f"Test loss: {test_loss:.4f}")
print(f"Test accuracy: {test_acc:.4f}")
print(f"Test ROC-AUC: {test_auc:.4f}")

print("\nClassification report:")
print(classification_report(test_targets, test_preds, target_names=data.target_names))
