# ARP Optimizer on MNIST (PyTorch)
This notebook trains a simple classifier on MNIST using the ARP optimizer from the toolkit.

In [None]:
import torch, torch.nn as nn
from torch.utils.data import DataLoader
from torchvision import datasets, transforms
from adaptive_dynamics.arp.optimizers import ARP

device = 'cuda' if torch.cuda.is_available() else 'cpu'
batch_size = 128
epochs = 3

# Data
transform = transforms.Compose([
    transforms.ToTensor(),
])
train_ds = datasets.MNIST(root='./data', train=True, transform=transform, download=True)
test_ds  = datasets.MNIST(root='./data', train=False, transform=transform, download=True)
train_loader = DataLoader(train_ds, batch_size=batch_size, shuffle=True)
test_loader  = DataLoader(test_ds, batch_size=batch_size)

In [None]:
model = nn.Sequential(
    nn.Flatten(),
    nn.Linear(28*28, 256), nn.ReLU(),
    nn.Linear(256, 10)
).to(device)
opt = ARP(model.parameters(), lr=3e-3, alpha=0.01, mu=0.001)
criterion = nn.CrossEntropyLoss()

def evaluate():
    model.eval(); correct=0; total=0
    with torch.no_grad():
        for X,y in test_loader:
            X,y = X.to(device), y.to(device)
            logits = model(X)
            pred = logits.argmax(dim=1)
            correct += (pred==y).sum().item()
            total += y.numel()
    return correct/total

for epoch in range(1, epochs+1):
    model.train()
    for X,y in train_loader:
        X,y = X.to(device), y.to(device)
        logits = model(X)
        loss = criterion(logits, y)
        loss.backward(); opt.step(); opt.zero_grad()
    acc = evaluate()
    print(f'Epoch {epoch}: test acc={acc:.3f}')

Try adjusting `alpha` and `mu` to see how the ARP conductance state influences learning dynamics.