# Baseline CNN

In [None]:
import numpy as np
import torch
import torch.nn as nn
import torch.optim as optim
import random
import os

from torch.utils.data import DataLoader, TensorDataset, random_split

def load_all_data(data_files, label_files):
    """
    data_files and label_files: lists of filenames for the data and labels.
    Returns:
      X: NumPy array of shape (N, H, W) or (N, H, W, 1)
      y: NumPy array of shape (N,)
    """
    all_data = []
    all_labels = []
    for dfile, lfile in zip(data_files, label_files):
        print(f"Loading {dfile} and {lfile}...")
        data_part = np.load(dfile)   # shape: (n_samples, H, W)
        label_part = np.load(lfile)  # shape: (n_samples,)
        all_data.append(data_part)
        all_labels.append(label_part)

    X = np.concatenate(all_data, axis=0)
    y = np.concatenate(all_labels, axis=0)
    return X, y

class SumOfDigitsCNN(nn.Module):
    def __init__(self, in_channels=1):
        super(SumOfDigitsCNN, self).__init__()
        self.conv1 = nn.Conv2d(in_channels, 32, kernel_size=3, padding=1)
        self.pool1 = nn.MaxPool2d(kernel_size=2, stride=2)

        self.conv2 = nn.Conv2d(32, 64, kernel_size=3, padding=1)
        self.pool2 = nn.MaxPool2d(kernel_size=2, stride=2)

        # After two poolings on (40,168), we have (40/2=20, 168/2=84) then again -> (10,42).
        # So the shape is (64, 10, 42) = 64*10*42 = 26880
        self.fc1 = nn.Linear(64 * 10 * 42, 128)
        self.drop = nn.Dropout(0.2)
        self.fc2 = nn.Linear(128, 1)  # single output for regression

    def forward(self, x):
        x = torch.relu(self.conv1(x))   # (B, 32, 40, 168)
        x = self.pool1(x)              # (B, 32, 20, 84)

        x = torch.relu(self.conv2(x))   # (B, 64, 20, 84)
        x = self.pool2(x)              # (B, 64, 10, 42)

        # Flatten
        x = x.view(x.size(0), -1)      # (B, 26880)
        x = torch.relu(self.fc1(x))    # (B, 128)
        x = self.drop(x)
        x = self.fc2(x)                # (B, 1)
        return x

def main():
    SEED = 42
    random.seed(SEED)
    np.random.seed(SEED)
    torch.manual_seed(SEED)

    # -------------------------------------------------------
    # 1) Load data
    # -------------------------------------------------------
    data_files = ["data0.npy", "data1.npy", "data2.npy"]
    label_files = ["lab0.npy", "lab1.npy", "lab2.npy"]
    X, y = load_all_data(data_files, label_files)
    print("Data shape:", X.shape)
    print("Labels shape:", y.shape)

    if len(X.shape) == 3:
        X = np.expand_dims(X, axis=-1)

    X = np.transpose(X, (0, 3, 1, 2))

    X = X.astype(np.float32) / 255.0

    X_tensor = torch.from_numpy(X)
    y_tensor = torch.from_numpy(y).float().view(-1, 1)

    # -------------------------------------------------------
    # 2) Create Dataset & Split
    # -------------------------------------------------------
    full_dataset = TensorDataset(X_tensor, y_tensor)
    total_samples = len(full_dataset)
    val_size = int(0.1 * total_samples)
    train_size = total_samples - val_size
    train_dataset, val_dataset = random_split(full_dataset, [train_size, val_size])

    print(f"Training samples: {train_size}")
    print(f"Validation samples: {val_size}")

    train_loader = DataLoader(train_dataset, batch_size=32, shuffle=True)
    val_loader = DataLoader(val_dataset, batch_size=32, shuffle=False)

    # -------------------------------------------------------
    # 3) Model, Loss, Optimizer
    # -------------------------------------------------------
    device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    print("Using device:", device)

    model = SumOfDigitsCNN(in_channels=1).to(device)
    criterion = nn.MSELoss()
    optimizer = optim.Adam(model.parameters(), lr=1e-3)

    # -------------------------------------------------------
    # 4) Training Loop
    # -------------------------------------------------------
    best_val_loss = float("inf")
    num_epochs = 30

    for epoch in range(num_epochs):
        model.train()
        train_loss_accum = 0.0

        for batch_x, batch_y in train_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            optimizer.zero_grad()

            preds = model(batch_x)
            loss = criterion(preds, batch_y)

            loss.backward()
            optimizer.step()

            train_loss_accum += loss.item() * batch_x.size(0)

        train_loss_epoch = train_loss_accum / train_size

        model.eval()
        val_loss_accum = 0.0
        with torch.no_grad():
            for batch_x, batch_y in val_loader:
                batch_x = batch_x.to(device)
                batch_y = batch_y.to(device)
                preds = model(batch_x)
                loss = criterion(preds, batch_y)
                val_loss_accum += loss.item() * batch_x.size(0)

        val_loss_epoch = val_loss_accum / val_size
        print(f"Epoch [{epoch+1}/{num_epochs}] "
              f"Train MSE: {train_loss_epoch:.4f} | Val MSE: {val_loss_epoch:.4f}")

        if val_loss_epoch < best_val_loss:
            best_val_loss = val_loss_epoch
            torch.save(model.state_dict(), "best_sum_digits_model.pth")
            print("  -> New best model saved!")

    # -------------------------------------------------------
    # 5) Evaluate best model
    # -------------------------------------------------------
    best_model = SumOfDigitsCNN(in_channels=1).to(device)
    best_model.load_state_dict(torch.load("best_sum_digits_model.pth", map_location=device))

    best_model.eval()

    val_loss_accum = 0.0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)
            preds = best_model(batch_x)
            loss = criterion(preds, batch_y)
            val_loss_accum += loss.item() * batch_x.size(0)

    final_val_loss = val_loss_accum / val_size
    print(f"\nFinal Val MSE (best model): {final_val_loss:.4f}")

    # -------------------------------------------------------
    # 6) Compute integer accuracy on validation set
    # -------------------------------------------------------
    correct = 0
    total = 0
    with torch.no_grad():
        for batch_x, batch_y in val_loader:
            batch_x = batch_x.to(device)
            batch_y = batch_y.to(device)

            preds = best_model(batch_x)
            preds_int = torch.round(preds).squeeze(1)
            labels_int = batch_y.squeeze(1)

            correct += (preds_int == labels_int).sum().item()
            total += len(batch_y)

    val_accuracy = correct / total
    print(f"Final Validation Accuracy: {val_accuracy * 100:.2f}%")

    val_iter = iter(val_loader)
    example_x, example_y = next(val_iter)
    example_x = example_x.to(device)
    example_preds = best_model(example_x)

    print("\nSample predictions:", example_preds[:5].detach().cpu().numpy().flatten())
    print("Actual sums:", example_y[:5].numpy().flatten())

if __name__ == "__main__":
    main()


Loading data0.npy and lab0.npy...
Loading data1.npy and lab1.npy...
Loading data2.npy and lab2.npy...
Data shape: (30000, 40, 168)
Labels shape: (30000,)
Training samples: 27000
Validation samples: 3000
Using device: cuda
Epoch [1/30] Train MSE: 31.4844 | Val MSE: 22.1904
  -> New best model saved!
Epoch [2/30] Train MSE: 17.7926 | Val MSE: 13.8352
  -> New best model saved!
Epoch [3/30] Train MSE: 13.9691 | Val MSE: 12.1919
  -> New best model saved!
Epoch [4/30] Train MSE: 12.0205 | Val MSE: 15.8894
Epoch [5/30] Train MSE: 10.9689 | Val MSE: 10.4995
  -> New best model saved!
Epoch [6/30] Train MSE: 9.9810 | Val MSE: 10.6911
Epoch [7/30] Train MSE: 9.2746 | Val MSE: 10.7359
Epoch [8/30] Train MSE: 8.6359 | Val MSE: 9.7961
  -> New best model saved!
Epoch [9/30] Train MSE: 8.2132 | Val MSE: 13.5956
Epoch [10/30] Train MSE: 7.8522 | Val MSE: 10.0874
Epoch [11/30] Train MSE: 7.2192 | Val MSE: 10.3959
Epoch [12/30] Train MSE: 6.9754 | Val MSE: 10.9692
Epoch [13/30] Train MSE: 6.6592 | Va

  best_model.load_state_dict(torch.load("best_sum_digits_model.pth", map_location=device))



Final Val MSE (best model): 9.7961
Final Validation Accuracy: 13.60%

Sample predictions: [16.290401 15.803567 21.477621 13.758346 25.975533]
Actual sums: [23. 14. 20. 15. 23.]
