In [None]:
import numpy as np

from sklearn.datasets import load_digits
from sklearn.model_selection import StratifiedKFold
from sklearn.cluster import KMeans
from sklearn.preprocessing import LabelEncoder

from tensorflow.keras.utils import to_categorical

from python_som import SOM
import math 

from scipy.spatial.distance import cdist
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score


In [None]:
digits = load_digits()
raw_X, raw_y = digits.data, digits.target
wanted_classes = [1,3,5]
mask = np.isin(raw_y, wanted_classes)
X = raw_X[mask]
y = raw_y[mask]

# Remaping Labels 
encoder = LabelEncoder()
y_encoded = encoder.fit_transform(y)

skf= StratifiedKFold(n_splits=3)
X_train = [] 
y_train = []
X_test = []
y_test = []

for train_idx, test_idx in skf.split(X, y_encoded):
    X_train.append(X[train_idx].astype('float32'))
    X_test.append(X[test_idx].astype('float32'))

    # Apply one-hot encoding to the labels
    y_train.append(to_categorical(y_encoded[train_idx], num_classes=3))
    y_test.append(to_categorical(y_encoded[test_idx], num_classes=3))

X_train = [x / 16.0 for x in X_train]
X_test = [x / 16.0 for x in X_test]

print(f"Features Shape: {X_train[0].shape}")
print(f"Targets Shape:  {y_train[0].shape}")

Features Shape: (364, 64)
Targets Shape:  (364, 3)


In [None]:
# Formula from slides
m = 5 * math.ceil(math.sqrt(X.shape[0]))

# Eigenvalues
cov_matrix = np.cov(X, rowvar=False)
eigenvalues = np.linalg.eigvals(cov_matrix)

sorted_evals = np.sort(eigenvalues)[::-1]

lambda_1 = sorted_evals[0]
lambda_2 = sorted_evals[1]

# Grid Ratio
ratio = np.sqrt(lambda_1 / lambda_2)

grid_y = math.ceil(math.sqrt(m / ratio))
grid_x = math.ceil(ratio * grid_y)

print(f"Target Neurons (m): {m}")
print(f"Top 2 Eigenvalues: {lambda_1:.4f}, {lambda_2:.4f}")
print(f"Calculated Ratio: {ratio:.4f}")
print(f"Suggested Grid Dimensions: {grid_x} x {grid_y} (Total: {grid_x*grid_y})")

Target Neurons (m): 120
Top 2 Eigenvalues: 257.7981, 203.2308
Calculated Ratio: 1.1263
Suggested Grid Dimensions: 13 x 11 (Total: 143)


In [None]:
# From project statement
r_neighborhood_function = math.ceil(grid_y / 2)

In [48]:
fold_accuracies = []
fold_precisions = []
fold_recalls = []
fold_f1s = []

best_f1 = 0

In [None]:

for k in range(3):
    # create SOM
    som = SOM(x = grid_x, y = grid_y,
    input_len = X_train[k].shape[1], learning_rate = 0.1,
    neighborhood_radius = r_neighborhood_function,
    neighborhood_function = 'gaussian',
    cyclic_x = True, cyclic_y = True)

    # weights 
    som.weight_initialization(mode='linear', data=X_train[k])

    # train
    som.train(data=X_train[k], n_iteration=50*X_train[k].shape[0], mode='random', verbose=False)

    # grid to fill with class labels
    grid_class_map = np.zeros((grid_x, grid_y, 3))

    # populate grid 
    for sample, label_one_hot in zip(X_train[k], y_train[k]):
        w = som.winner(sample) # Get BMU
        true_class = np.argmax(label_one_hot)
        grid_class_map[w] += label_one_hot # Add vote

    # Coords with at least one vote
    filled_coords = np.argwhere(grid_class_map.sum(axis=2) > 0)
    
    # Create final label grid 
    neuron_labels = np.full((grid_x, grid_y), -1)

    # Assign majority class
    for i, j in filled_coords:
        neuron_labels[i, j] = np.argmax(grid_class_map[i, j])
        
    # empty neurons: find the nearest filled neuron
    all_coords = np.ndindex((grid_x, grid_y))

    for r, c in all_coords:
        if neuron_labels[r, c] == -1:
            dists = cdist([[r, c]], filled_coords, metric='euclidean')
            # Find nearest filled neuron
            nearest_idx = np.argmin(dists)
            nearest_coord = filled_coords[nearest_idx]
            # Label from nearest filled neuron
            neuron_labels[r, c] = neuron_labels[nearest_coord[0], nearest_coord[1]]

    # Predicting on test set
    y_pred = []
    y_true = []

    for sample, label_one_hot in zip(X_test[k], y_test[k]):
        w = som.winner(sample)
        predicted_class = neuron_labels[w]
        
        y_pred.append(predicted_class)
        y_true.append(np.argmax(label_one_hot))


# Calculate Metrics
    acc = accuracy_score(y_true, y_pred)
    prec = precision_score(y_true, y_pred, average='weighted')
    rec = recall_score(y_true, y_pred, average='weighted')
    f1 = f1_score(y_true, y_pred, average='weighted')

    fold_accuracies.append(acc)
    fold_precisions.append(prec)
    fold_recalls.append(rec)
    fold_f1s.append(f1)

    print(f"\n Fold {k+1} Test Results:")
    print(f"Accuracy:  {acc:.4f}")
    print(f"Precision: {prec:.4f}")
    print(f"Recall:    {rec:.4f}")
    print(f"F1-Score:  {f1:.4f}")
    
    # Save best SOM
    if f1 > best_f1:
        best_f1 = f1
        best_som = som
        best_neuron_labels = neuron_labels.copy()
        best_fold = k # to use for visualization, in project statement 

  


 Fold 1 Test Results:
Accuracy:  0.9781
Precision: 0.9795
Recall:    0.9781
F1-Score:  0.9783

 Fold 2 Test Results:
Accuracy:  0.9725
Precision: 0.9725
Recall:    0.9725
F1-Score:  0.9725

 Fold 3 Test Results:
Accuracy:  0.9780
Precision: 0.9785
Recall:    0.9780
F1-Score:  0.9781


In [50]:
# Average Metrics
print("\n Average Test Results")
print(f"Accuracy:  {np.mean(fold_accuracies):.4f}")
print(f"Precision: {np.mean(fold_precisions):.4f}")
print(f"Recall:    {np.mean(fold_recalls):.4f}")
print(f"F1-Score:  {np.mean(fold_f1s):.4f}")



 Average Test Results
Accuracy:  0.9470
Precision: 0.9511
Recall:    0.9470
F1-Score:  0.9467
