# Intro to  Neural Networks with PyTorch
## Task 1 â€“ Dense Network on Tabular Data

**Objective:** Binary classification to predict whether income exceeds $50K/yr based on census data.

**Dataset + Split:** UCI Adult (Census Income) via sklearn.fetch_openml("adult")

**Model Idea:** MLP with optional dropout and/or BN

**Hyperparameters Chosen:** Depth=2, Width=256, Dropout=0.3, Learning_rate= 1e-3, Weight_decay=1e-4

**Results:**

**Next Steps:**

In [104]:
# Initialize wandb run
import wandb

config = dict(
    model="mlp",
    depth=2,
    width=256,
    dropout=0.3,
    batch_size=512,
    lr=1e-3,
    weight_decay=1e-4,
    seed=42
) # Config for wandb run


wandb.login()
run = wandb.init(
    project="pytorch-bootcamp", 
    config=config,
    tags=["task=adult", "model=mlp"]
)



### Import and Preprocess Dataset

In [144]:
import numpy as np, pandas as pd
from sklearn.datasets import fetch_openml

# Import dataset
adult = fetch_openml('adult', version=2, as_frame=True) # Fetch dataset as a pandas dataframe
df = adult.frame.copy()
df.rename(columns={"class":"target"}, inplace=True) # Rename the target column to "target"

# Remove stray spaces
for c in df.columns:
    if df[c].dtype == object: # If datatype is text
        df[c] = df[c].astype(str).strip() # Remove any blank spaces before or after the text

# Separate feature and target variables
X = df.drop(columns=["target"]) 
y = (df["target"] == ">50K").astype(int) # Convert ">50K" to 1 and "<=50K" to  => "One-Hot Encoding"


print("Shape:", df.shape)# Check df shape
print("Target positive rate:", y.mean().round(3)) # Fraction of people with >50k income
df.head(3) # First three rows

Shape: (48842, 15)
Target positive rate: 0.239


Unnamed: 0,age,workclass,fnlwgt,education,education-num,marital-status,occupation,relationship,race,sex,capital-gain,capital-loss,hours-per-week,native-country,target
0,25,Private,226802,11th,7,Never-married,Machine-op-inspct,Own-child,Black,Male,0,0,40,United-States,<=50K
1,38,Private,89814,HS-grad,9,Married-civ-spouse,Farming-fishing,Husband,White,Male,0,0,50,United-States,<=50K
2,28,Local-gov,336951,Assoc-acdm,12,Married-civ-spouse,Protective-serv,Husband,White,Male,0,0,40,United-States,>50K


In [171]:
# Split into train/test/val
from sklearn.model_selection import train_test_split
seed = 42

X_train, X_temp, y_train, y_temp = train_test_split(
    X, y, test_size=0.30, random_state=seed, stratify=y
) # Stratify to maintain similar class balance across splits
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp, test_size=0.50, random_state=seed, stratify=y_temp
)


print("Train:", X_train.shape, "Val:", X_val.shape, "Test:", X_test.shape)
print("Positive rate (train/val/test):", 
      round(y_train.mean(), 3), round(y_val.mean(), 3), round(y_test.mean(), 3))

Train: (34189, 14) Val: (7326, 14) Test: (7327, 14)
Positive rate (train/val/test): 0.239 0.239 0.239


In [172]:
# Identify numerical vs categorical terms
num_cols = X_train.select_dtypes(include=[np.number]).columns.tolist()
cat_cols = [c for c in X_train.columns if c not in num_cols]

print("Numeric columns:", num_cols)
print("\nCategorical columns:", cat_cols)

Numeric columns: ['age', 'fnlwgt', 'education-num', 'capital-gain', 'capital-loss', 'hours-per-week']

Categorical columns: ['workclass', 'education', 'marital-status', 'occupation', 'relationship', 'race', 'sex', 'native-country']


In [173]:
# Build preprocessing pipeline
from sklearn.compose import ColumnTransformer
from sklearn.preprocessing import StandardScaler, OneHotEncoder

preprocess = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), num_cols),
        ("cat", OneHotEncoder(handle_unknown="ignore", sparse_output=False), cat_cols)
    ],
    remainder="drop"
)

# Fit on training data, transform everywhere
X_train_np = preprocess.fit_transform(X_train)
X_val_np = preprocess.transform(X_val)
X_test_np = preprocess.transform(X_test)


print("Train/Test/Val Preprocessed:\n", X_train_np.shape, X_val_np.shape, X_test_np.shape)

Train/Test/Val Preprocessed:
 (34189, 108) (7326, 108) (7327, 108)


In [174]:
# Convert to PyTorch tensors + DataLoaders
import torch
from torch.utils.data import TensorDataset, DataLoader

def make_loader(X_np, y_s, batch_size=512, shuffle=False):
    X_t = torch.from_numpy(X_np.astype(np.float32))
    y_t = torch.from_numpy(y_s.values.astype(np.float32))
    return DataLoader(TensorDataset(X_t, y_t), batch_size=batch_size, shuffle=shuffle)

train_loader = make_loader(X_train_np, y_train, batch_size=512, shuffle=True)
val_loader = make_loader(X_val_np, y_val, batch_size=512, shuffle=False)
test_loader = make_loader(X_test_np, y_test, batch_size=512, shuffle=False)

batch = next(iter(train_loader))
print("One batch X:", batch[0].shape, " y:", batch[1].shape)

One batch X: torch.Size([512, 108])  y: torch.Size([512])


### Model Definiton

In [176]:
# Define the MLP
import torch.nn as nn

class MLP(nn.Module):
    def __init__(self, in_dim, width=256, depth=2, dropout=0.2): # in_dim => Number of input features
        super().__init__() # Initialization of parent module (nn.Module)
        layers = []
        d = in_dim # Current input size to each layer
        for _ in range(depth):
            layers += [nn.Linear(d, width), nn.ReLU()] # Makes a connection from d inputs -> width neurons; Adds activation function (introduces non-linearity)
            if dropout and dropout > 0: # Dropout if present
                layers += [nn.Dropout(dropout)]
            d = width # Update input size
        layers += [nn.Linear(d, 1)] # Final output layer => 1 logit for binary classification
        self.net = nn.Sequential(*layers) # Combines all layers into one block and unpacks the list

    def forward(self, x): # Moves input through layers of self.net(x)
        return self.net(x).squeeze(1) # Removes unnecessary dimension to match what loss function expects

In [177]:
# Loss, Optimizer, Device
from sklearn.metrics import roc_auc_score, accuracy_score
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")

input_dim = X_train_np.shape[1] # Number of columns
model = MLP(in_dim=input_dim, width=256, depth=2, dropout=0.3).to(device) # Create model using device

criterion = nn.BCEWithLogitsLoss() # BCE Loss with built-in sigmoid

optimizer = torch.optim.AdamW(model.parameters(), lr=1e-3, weight_decay=1e-4) # Method of optimization of weights during gradient descent

In [178]:
# Define evaluator for AUC and ACC
def evaluate(loader):
    model.eval() # Switches model to evaluation mode => Dropout is disabled, BatchNorm uses stored statistics
    all_probs, all_targets = [], []
    with torch.no_grad(): # Turns off gradient tracking since no backpropagation is used
        for xb, yb in loader: # Iterate over mini-batches from DataLoader
            xb, yb = xb.to(device), yb.to(device) # Move tensors to same device
            logits = model(xb) # Forward pass to get logits
            probs = torch.sigmoid(logits) # Convert logits -> probabilities
            all_probs.append(probs.cpu()) # Move to CPU for numpy conversion
            all_targets.append(yb.cpu())
        probs = torch.cat(all_probs).numpy() # Stitch all batches into single NumPy arrays for metric functions
        targets = torch.cat(all_targets).numpy()
        auc = roc_auc_score(targets, probs)
        acc = accuracy_score(targets, (probs >= 0.5).astype(int))
        return acc, auc

### Training, Validation and Testing

In [180]:
# Train and validate
import copy

best_state = None
best_val_auc = -1.0

max_epochs = 10
for epoch in range(1, max_epochs + 1):
    model.train()
    running_loss = 0.0 # Accumulator for total training loss this epoch

    # TRAINING
    for xb, yb in train_loader: # Loop over training mini-batches and compute in CPU
        xb, yb = xb.to(device), yb.to(device)
        optimizer.zero_grad(set_to_none=True) # Reset gradients from previous step
        logits = model(xb) # Forward pass
        loss = criterion(logits, yb) # Compute BCEWithLogits loss
        loss.backward() # Backpropagate to compute gradients of loss w.r.t. every trainable parameter
        optimizer.step() # Update weights using AdamW
        running_loss += loss.item() * xb.size(0) # Update batch loss
        
    train_loss = running_loss / len(train_loader.dataset) # Average loss per sample this epoch (total loss sum / no. of samples)

    # VALIDATION
    val_acc, val_auc = evaluate(val_loader)

    # Checkpoint if val AUC improves
    if val_auc > best_val_auc:
        best_val_auc = val_auc
        best_state = copy.deepcopy(model.state_dict()) # Save best weights

    # Log progress
    print(f"Epoch {epoch:02d} | "
          f"train_loss={train_loss:.4f} | "
          f"val_acc={val_acc:.3f} | "
          f"val_auc={val_auc:.3f}")

# After training
if best_state is not None:
    model.load_state_dict(best_state)

# TESTING
test_acc, test_auc = evaluate(test_loader)
print("\nFinal Evaluation:")
print(f"Best Val AUC={best_val_auc:.3f} | "
      f"Test Acc={test_acc:.3f} | Test AUC={test_auc:.3f}")

Epoch 01 | train_loss=0.3926 | val_acc=0.859 | val_auc=0.913
Epoch 02 | train_loss=0.3172 | val_acc=0.861 | val_auc=0.915
Epoch 03 | train_loss=0.3097 | val_acc=0.862 | val_auc=0.917
Epoch 04 | train_loss=0.3067 | val_acc=0.862 | val_auc=0.917
Epoch 05 | train_loss=0.3035 | val_acc=0.862 | val_auc=0.917
Epoch 06 | train_loss=0.3029 | val_acc=0.861 | val_auc=0.918
Epoch 07 | train_loss=0.3020 | val_acc=0.863 | val_auc=0.918
Epoch 08 | train_loss=0.2997 | val_acc=0.864 | val_auc=0.917
Epoch 09 | train_loss=0.2971 | val_acc=0.863 | val_auc=0.918
Epoch 10 | train_loss=0.2962 | val_acc=0.864 | val_auc=0.918

Final Evaluation:
Best Val AUC=0.918 | Test Acc=0.858 | Test AUC=0.911
