In [2]:
#!/usr/bin/env python
from __future__ import print_function

import torch
from torch import nn

In [3]:
# ----------------------------
# Model definition
# ----------------------------
class NeuralNetwork(nn.Module):
    """
    Simple feedforward neural network:
    - Input: 28x28 image (flattened to 784 features)
    - Hidden layers: two fully connected layers with ReLU activations
    - Output: a single scalar (regression target)
    """
    def __init__(self):
        super().__init__()
        self.flatten = nn.Flatten()
        self.linear_relu_stack = nn.Sequential(
            nn.Linear(28 * 28, 512),  # first hidden layer
            nn.ReLU(),
            nn.Linear(512, 512),      # second hidden layer
            nn.ReLU(),
            nn.Linear(512, 1),        # final layer outputs 1 value
        )

    def forward(self, x):
        x = self.flatten(x)           # flatten input from [B, 1, 28, 28] -> [B, 784]
        return self.linear_relu_stack(x)

In [4]:

# ----------------------------
# Ground-truth function (synthetic regression target)
# ----------------------------
INPUT_DIM = 28 * 28
torch.manual_seed(0)  # for reproducibility

# Hidden "true" linear mapping: y = xW + b
W_target = torch.randn(INPUT_DIM, 1) * 0.5
b_target = torch.randn(1) * 0.5


def f(x_flat):
    """
    Ground-truth function for regression.
    Args:
        x_flat: tensor of shape [batch, 784]
    Returns:
        tensor of shape [batch, 1]
    """
    return x_flat.mm(W_target) + b_target


def make_dataset(n_samples=60000):
    """
    Generate a synthetic dataset of (x, y) pairs.
    - x: random "image-like" inputs of shape [N, 1, 28, 28]
    - y: regression targets computed from ground-truth function f
    """
    x = torch.randn(n_samples, 1, 28, 28)
    x_flat = x.view(n_samples, -1)
    y = f(x_flat)
    return x, y


def get_split_indices(n_samples, train_ratio=0.7, val_ratio=0.15):
    """
    Compute number of samples in train/val/test split.
    Args:
        n_samples: total number of samples
        train_ratio: fraction of training samples
        val_ratio: fraction of validation samples
    Returns:
        (n_train, n_val, n_test)
    """
    n_train = int(n_samples * train_ratio)
    n_val = int(n_samples * val_ratio)
    n_test = n_samples - n_train - n_val
    return n_train, n_val, n_test













In [5]:
# Select device: CUDA > MPS (Apple Silicon) > CPU
device = (
        torch.device("cuda")
        if torch.cuda.is_available()
        else torch.device("mps") if torch.backends.mps.is_available()
        else torch.device("cpu"))

# Generate dataset and split into train/val/test
N = 60000
X, Y = make_dataset(N)
n_train, n_val, n_test = get_split_indices(N)

X_train, Y_train = X[:n_train], Y[:n_train]
X_val, Y_val = X[n_train:n_train+n_val], Y[n_train:n_train+n_val]
X_test, Y_test = X[n_train+n_val:], Y[n_train+n_val:]

# Initialize model, loss, and optimizer
model = NeuralNetwork().to(device)
loss_fn = nn.MSELoss()  # regression loss
optimizer = torch.optim.SGD(model.parameters(), lr=1e-2)

# Training settings
BATCH_SIZE = 64
EPOCHS = 10

In [6]:
# ----------------------------
# Training loop
# ----------------------------
for epoch in range(1, EPOCHS+1):
        # Shuffle training set each epoch
        perm = torch.randperm(n_train)
        X_train = X_train[perm]
        Y_train = Y_train[perm]

        model.train()
        total_loss = 0.0

        # Mini-batch training
        for i in range(0, n_train, BATCH_SIZE):
            x_batch = X_train[i:i+BATCH_SIZE].to(device)
            y_batch = Y_train[i:i+BATCH_SIZE].to(device)

            # Forward pass
            pred = model(x_batch)
            loss = loss_fn(pred, y_batch)

            # Backward pass + parameter update
            optimizer.zero_grad()
            loss.backward()
            optimizer.step()

            # Accumulate weighted batch loss
            total_loss += loss.item() * len(x_batch)

        # Average training loss
        avg_train_loss = total_loss / n_train

        # ----------------------------
        # Validation phase
        # ----------------------------
        model.eval()
        with torch.no_grad():
            pred_val = model(X_val.to(device))
            val_loss = loss_fn(pred_val, Y_val.to(device)).item()

        print(f"Epoch {epoch:02d}: train_loss={avg_train_loss:.6f}, val_loss={val_loss:.6f}")

# ----------------------------
# Final test evaluation
# ----------------------------
model.eval()
with torch.no_grad():
        pred_test = model(X_test.to(device))
        test_loss = loss_fn(pred_test, Y_test.to(device)).item()
print(f"Test MSE: {test_loss:.6f}")

Epoch 01: train_loss=48.364317, val_loss=17.835579
Epoch 02: train_loss=28.789821, val_loss=9.167013
Epoch 03: train_loss=16.698547, val_loss=39.731724
Epoch 04: train_loss=15.485734, val_loss=5.940180
Epoch 05: train_loss=10.399819, val_loss=8.941078
Epoch 06: train_loss=9.359571, val_loss=3.547918
Epoch 07: train_loss=5.446788, val_loss=5.587064
Epoch 08: train_loss=5.339022, val_loss=2.952600
Epoch 09: train_loss=4.955244, val_loss=3.221295
Epoch 10: train_loss=3.831637, val_loss=2.169223
Test MSE: 2.173250
