In [1]:
import torch
import torch.nn as nn
import torch.optim as optim
from sklearn.datasets import load_digits
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score

In [2]:
digits = load_digits()

In [3]:
X = digits.data
y = digits.target

In [6]:
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)

In [7]:
X_train, X_test, y_train, y_test = train_test_split(
    X_scaled, y, test_size=0.2, random_state=42, stratify=y
)

In [8]:
X_train = torch.tensor(X_train, dtype=torch.float32)
y_train = torch.tensor(y_train, dtype=torch.long)
X_test = torch.tensor(X_test, dtype=torch.float32)
y_test = torch.tensor(y_test, dtype=torch.long)

In [9]:
class DigitNet(nn.Module):
    def __init__(self, input_dim, num_classes):
        super().__init__()
        self.net = nn.Sequential(
            nn.Linear(input_dim, 128),
            nn.ReLU(),
            nn.Linear(128, 64),
            nn.ReLU(),
            nn.Linear(64, num_classes)
        )

    def forward(self, x):
        return self.net(x)

model = DigitNet(input_dim=64, num_classes=10)

In [10]:
criterion = nn.CrossEntropyLoss()
optimizer = optim.Adam(model.parameters(), lr=0.001)

In [11]:
epochs = 100

for epoch in range(epochs):
    outputs = model(X_train)
    loss = criterion(outputs, y_train)

    optimizer.zero_grad()
    loss.backward()
    optimizer.step()

    if (epoch + 1) % 10 == 0:
        print(f"Epoch [{epoch+1}/{epochs}], Loss: {loss.item():.4f}")

Epoch [10/100], Loss: 2.0395
Epoch [20/100], Loss: 1.6385
Epoch [30/100], Loss: 1.1325
Epoch [40/100], Loss: 0.6928
Epoch [50/100], Loss: 0.4190
Epoch [60/100], Loss: 0.2672
Epoch [70/100], Loss: 0.1831
Epoch [80/100], Loss: 0.1328
Epoch [90/100], Loss: 0.1008
Epoch [100/100], Loss: 0.0788


In [12]:
model.eval()
with torch.no_grad():
    preds = model(X_test)
    predicted_labels = torch.argmax(preds, dim=1)
    acc = accuracy_score(y_test, predicted_labels)
print(f"Accuracy: {acc*100:.2f}%")

Accuracy: 95.83%


In [14]:
for name, param in model.named_parameters():
    if param.requires_grad:
        print(f"{name}: grad mean = {param.grad.abs().mean():.6f}")

net.0.weight: grad mean = 0.000268
net.0.bias: grad mean = 0.000454
net.2.weight: grad mean = 0.000252
net.2.bias: grad mean = 0.000277
net.4.weight: grad mean = 0.001613
net.4.bias: grad mean = 0.000771
