# Example implementation of a model using CPNN

### Necessary import and environmental variable setting to ensure reproducibility

In [1]:
import os
os.environ['CUBLAS_WORKSPACE_CONFIG'] = ':16:8'

### Install libraries if needed

In [2]:
#!pip install torch
#!pip install optuna
#!pip install numpy

### Imports and seeding

In [3]:
import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim

from torchvision import datasets, transforms
from torch.utils.data import DataLoader, random_split

print(torch.cuda.is_available())
print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else "No GPU found")

# Although random is not explicitly used in this file, certain functions do use it.
# Therefore, not seeding it is a source of randomness
import random
import numpy as np

# Imports for displaying
from sklearn.metrics import confusion_matrix, ConfusionMatrixDisplay
import matplotlib.pyplot as plt

# For bayesian hyperparameter search
import optuna

from BaseCPNN import BaseCPNN

# Seeding method
def set_all_seeds(seed=42):
    np.random.seed(seed)
    torch.manual_seed(seed)
    torch.cuda.manual_seed_all(seed)
    torch.backends.cudnn.deterministic = True
    torch.backends.cudnn.benchmark = False
    torch.use_deterministic_algorithms(True)
    random.seed(42)

# Seeding method specific for DataLoaders
def seed_worker(worker_id):
    worker_seed = 42
    np.random.seed(worker_seed)
    random.seed(worker_seed)

False
No GPU found


### We load the data and we preprocess it slightly by transforming it

In [4]:
# Define the transformation
transform=transforms.Compose([transforms.ToTensor(), # first, convert image to PyTorch tensor
transforms.Normalize((0.1307,), (0.3081,)) # normalize inputs
                              ])

full_train_dataset = datasets.FashionMNIST(root="./data", train=True, download=True, transform=transform)
test_dataset = datasets.FashionMNIST(root="./data", train=False, download=False, transform=transform)


# Print the number of training and test samples
print(f"Training samples: {len(full_train_dataset)}")
print(f"Testing samples: {len(test_dataset)}")

# Get a sample and inspect its shape
sample, label = full_train_dataset[0]
print("Sample image shape:", sample.shape)
# If flattening, input size is:
input_size = sample.numel()  # This will give 3072 for CIFAR-10
print("Input size (flattened):", input_size)

# Output size (number of classes)
output_size = len(full_train_dataset.classes)
print("Output size (number of classes):", output_size)

# Set device
device = torch.device("cuda" if torch.cuda.is_available() else "cpu")





100%|█████████████████████████████████████████████████████████████████| 5.15k/5.15k [00:00<00:00, 796kB/s]

Training samples: 60000
Testing samples: 10000
Sample image shape: torch.Size([1, 28, 28])
Input size (flattened): 784
Output size (number of classes): 10





### Finally, we train the network. Afterwards, we retrain with the best hyperparameters and create the confussion matrix to display its performance.

In [5]:
# @title
# Train the model

def objective(trial):

    set_all_seeds(42)

    g = torch.Generator()
    g.manual_seed(42)

    # Create DataLoaders
    train_loader = DataLoader(full_train_dataset, batch_size=256, shuffle=True, num_workers=8, pin_memory=True,  worker_init_fn=seed_worker, generator=g)
    test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=8, pin_memory=True, worker_init_fn=seed_worker, generator=g)


    # Hyperparameters to tune
    kohonen_lr = trial.suggest_float("kohonen_lr", 0.6, 0.9, log=True)
    grossberg_lr = trial.suggest_float("grossberg_lr", 0.15, 0.3, log=True)
    neighborhood_size = trial.suggest_int("neighborhood_size", 16, 18, step=2)
    neighborhood_function = trial.suggest_categorical("neighborhood_function", ['gaussian', 'rectangular', 'triangular'])
    hidden_size = trial.suggest_int("hidden_size", 250, 250, step=250)


    # Create the model with the current hyperparameters.
    model = BaseCPNN(input_size, hidden_size, output_size, neighborhood_function, neighborhood_size).to(device)

    # Train the model using the current hyperparameters.
    model.fit(device,
              train_loader=test_loader,
              val_loader=test_loader,
              epochs=5,
              kohonen_lr=kohonen_lr,
              grossberg_lr=grossberg_lr,
              early_stopping=True,
              patience=10)

    # Evaluate the model on the test set
    val_loss = model.evaluate(test_loader, return_loss=True)
    print(f"Validation loss: {val_loss}")
    return val_loss

# Create and run the Optuna study.
study = optuna.create_study(direction="minimize", pruner=optuna.pruners.MedianPruner(), sampler=optuna.samplers.TPESampler(seed=42))
study.optimize(objective, n_trials=5)

print("Best trial:")
trial = study.best_trial
print("  Accuracy: ", trial.value)
print("  Params: ")
for key, value in trial.params.items():
    print(f"    {key}: {value}")

[I 2025-04-17 13:07:37,574] A new study created in memory with name: no-name-5342657e-d86c-4932-8704-a4766a555ca3



[Epoch 1] Starting training...
Accuracy: 39.82%
[Epoch 1] Validation loss: 17184715212779946.0000

[Epoch 2] Starting training...
Accuracy: 45.94%
[Epoch 2] Validation loss: 18260111235176838246210033680384.0000

[Epoch 3] Starting training...
Accuracy: 10.00%
[Epoch 3] Validation loss: nan

[Epoch 4] Starting training...
Accuracy: 10.00%
[Epoch 4] Validation loss: nan

[Epoch 5] Starting training...
Accuracy: 10.00%
[Epoch 5] Validation loss: nan


[W 2025-04-17 13:07:54,957] Trial 0 failed with parameters: {'kohonen_lr': 0.698400419368405, 'grossberg_lr': 0.28992441036839267, 'neighborhood_size': 18, 'neighborhood_function': 'gaussian', 'hidden_size': 250} because of the following error: The value nan is not acceptable.
[W 2025-04-17 13:07:54,958] Trial 0 failed with value nan.


Accuracy: 10.00%
Validation loss: nan

[Epoch 1] Starting training...
Accuracy: 57.77%
[Epoch 1] Validation loss: 1.9034

[Epoch 2] Starting training...
Accuracy: 59.07%
[Epoch 2] Validation loss: 1.8862

[Epoch 3] Starting training...
Accuracy: 64.16%
[Epoch 3] Validation loss: 1.8438

[Epoch 4] Starting training...
Accuracy: 70.24%
[Epoch 4] Validation loss: 1.7874

[Epoch 5] Starting training...
Accuracy: 76.54%
[Epoch 5] Validation loss: 1.7504


[I 2025-04-17 13:08:12,396] Trial 1 finished with value: 1.7504054286956787 and parameters: {'kohonen_lr': 0.6142982339719593, 'grossberg_lr': 0.27342376581350497, 'neighborhood_size': 18, 'neighborhood_function': 'triangular', 'hidden_size': 250}. Best is trial 1 with value: 1.7504054286956787.


Accuracy: 76.54%
Validation loss: 1.7504054286956787

[Epoch 1] Starting training...
Accuracy: 61.45%
[Epoch 1] Validation loss: 1.8821

[Epoch 2] Starting training...
Accuracy: 62.62%
[Epoch 2] Validation loss: 1.8655

[Epoch 3] Starting training...
Accuracy: 68.03%
[Epoch 3] Validation loss: 1.8317

[Epoch 4] Starting training...
Accuracy: 69.25%
[Epoch 4] Validation loss: 1.8067

[Epoch 5] Starting training...
Accuracy: 77.39%
[Epoch 5] Validation loss: 1.7388


[I 2025-04-17 13:08:29,651] Trial 2 finished with value: 1.7387690605163575 and parameters: {'kohonen_lr': 0.8408860022624141, 'grossberg_lr': 0.17378476486519037, 'neighborhood_size': 16, 'neighborhood_function': 'triangular', 'hidden_size': 250}. Best is trial 2 with value: 1.7387690605163575.


Accuracy: 77.39%
Validation loss: 1.7387690605163575

[Epoch 1] Starting training...
Accuracy: 60.11%
[Epoch 1] Validation loss: 1.8974

[Epoch 2] Starting training...
Accuracy: 61.96%
[Epoch 2] Validation loss: 1.8803

[Epoch 3] Starting training...
Accuracy: 65.30%
[Epoch 3] Validation loss: 1.8501

[Epoch 4] Starting training...
Accuracy: 72.04%
[Epoch 4] Validation loss: 1.7922

[Epoch 5] Starting training...
Accuracy: 78.24%
[Epoch 5] Validation loss: 1.7379


[I 2025-04-17 13:08:47,155] Trial 3 finished with value: 1.73791587600708 and parameters: {'kohonen_lr': 0.7148468249446474, 'grossberg_lr': 0.18355235708688308, 'neighborhood_size': 18, 'neighborhood_function': 'triangular', 'hidden_size': 250}. Best is trial 3 with value: 1.73791587600708.


Accuracy: 78.24%
Validation loss: 1.73791587600708

[Epoch 1] Starting training...
Accuracy: 50.13%
[Epoch 1] Validation loss: 2.0022

[Epoch 2] Starting training...
Accuracy: 60.53%
[Epoch 2] Validation loss: 1.8766

[Epoch 3] Starting training...
Accuracy: 66.63%
[Epoch 3] Validation loss: 1.8458

[Epoch 4] Starting training...
Accuracy: 67.67%
[Epoch 4] Validation loss: 1.8176

[Epoch 5] Starting training...
Accuracy: 72.93%
[Epoch 5] Validation loss: 1.7677


[I 2025-04-17 13:09:04,572] Trial 4 finished with value: 1.767697046470642 and parameters: {'kohonen_lr': 0.7218736479015538, 'grossberg_lr': 0.2584953739333517, 'neighborhood_size': 16, 'neighborhood_function': 'rectangular', 'hidden_size': 250}. Best is trial 3 with value: 1.73791587600708.


Accuracy: 72.93%
Validation loss: 1.767697046470642
Best trial:
  Accuracy:  1.73791587600708
  Params: 
    kohonen_lr: 0.7148468249446474
    grossberg_lr: 0.18355235708688308
    neighborhood_size: 18
    neighborhood_function: triangular
    hidden_size: 250


In [None]:
# Load the best model
best_params = study.best_trial.params

# Reset all seeds
set_all_seeds(42)

best_model = BaseCPNN(input_size,
                      best_params['hidden_size'],
                      output_size,
                      neighborhood_function=best_params['neighborhood_function'],
                      neighborhood_size=best_params['neighborhood_size']).to(device)


# Reload the generator to ensure that the loader passes data in the same order
g = torch.Generator()
g.manual_seed(42)

train_loader = DataLoader(full_train_dataset, batch_size=256, shuffle=True, num_workers=8, pin_memory=True,  worker_init_fn=seed_worker, generator=g)
test_loader = DataLoader(test_dataset, batch_size=256, shuffle=False, num_workers=8, pin_memory=True, worker_init_fn=seed_worker, generator=g)


# Retrain
best_model.fit(device,
               train_loader=test_loader,
               val_loader=test_loader,
               epochs=5,
               kohonen_lr=best_params['kohonen_lr'],
               grossberg_lr=best_params['grossberg_lr'],
               early_stopping=True,
               patience=10)

# Get predictions for the test set
true_labels = []
pred_labels = []

best_model.eval()
with torch.no_grad():
    for batch_x, batch_y in test_loader:
        batch_x = batch_x.to(device)
        outputs, _ = best_model(batch_x)
        preds = outputs.argmax(dim=1).cpu()
        pred_labels.extend(preds.tolist())
        true_labels.extend(batch_y.tolist())

# Create confusion matrix
cm = confusion_matrix(true_labels, pred_labels)
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=list(range(output_size)))
disp.plot(cmap=plt.cm.Blues)
plt.title("Confusion Matrix - Best Trial")
plt.show()


[Epoch 1] Starting training...
Accuracy: 60.11%
[Epoch 1] Validation loss: 1.8974

[Epoch 2] Starting training...
