#### 1. Loading Processed Data

##### 1.1 Setup Imports

In [1]:
import csv, os, datetime
import pickle
import joblib
import numpy as np
# Model related imports
import tensorflow as tf
from tensorflow.keras import layers, models
from tensorflow.keras.utils import to_categorical
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from tensorflow.keras.optimizers import Adam
# Visualization imports
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.model_selection import train_test_split
from sklearn.metrics import (
    accuracy_score, precision_score, recall_score, f1_score, 
    roc_auc_score, confusion_matrix, classification_report
)

tf.random.set_seed(42)
np.random.seed(42)

##### 1.2 Initialise functions

In [2]:
def save_run_metrics(run, accuracy, precision, recall, f1, FAR, roc_area, auc, csv_path="model_results.csv"):
    file_exists = os.path.isfile(csv_path)

    with open(csv_path, mode="a", newline="") as f:
        writer = csv.writer(f)

        # Header on first creation
        if not file_exists:
            writer.writerow([
                "timestamp", "run",
                "accuracy", "precision", "recall",
                "f1", "false_acceptance_rate",
                "roc_area", "auc"
            ])

        writer.writerow([
            datetime.datetime.now().strftime("%Y-%m-%d %H:%M:%S"),
            run,
            accuracy,
            precision,
            recall,
            f1,
            FAR,
            roc_area,
            auc
        ])


In [None]:
import numpy as np
import os

# Path relative to notebook directory
base_path = "../data/processed"

X_train = np.load(os.path.join(base_path, "X_train_processed.npy"))
X_test = np.load(os.path.join(base_path, "X_test_processed.npy"))
y_train = np.load(os.path.join(base_path, "y_train_encoded.npy"))
y_test = np.load(os.path.join(base_path, "y_test_encoded.npy"))

print(X_train.shape, y_train.shape)

##### 1.3 Setup root directory and run/results folder

In [None]:
# Root directory containing all runs
root_dir = r"..\data"
# run_folders = [
#     "JustGaussianBlur",
#     "JustOtsu",
#     "JustRobert",
#     "JustGrayscale",
#     "JustPrewitt",
#     "JustSobel"
# ]
# run = run_folders[0]
run = "processed"
tf.keras.backend.clear_session()
run_path = os.path.join(root_dir, run)
results_dir = os.path.join(run_path, "results")
os.makedirs(results_dir, exist_ok=True)

##### 1.4 Import input data

In [None]:
X_train_proc = np.load(os.path.join(run_path, "X_train_processed.npy"))
y_train_enc  = np.load(os.path.join(run_path, "y_train_encoded.npy"))
# X_test_proc  = np.load(os.path.join(run_path, "X_test_processed.npy")) # not using
# y_test_enc   = np.load(os.path.join(run_path, "y_test_encoded.npy")) # not using
label_encoder = joblib.load(os.path.join(run_path, "label_encoder.pkl"))

#### 2. Initial Setup

##### 2.1 Split training/validation sets

In [None]:
# 70% Train, 15% Val, 15% Test
X_train, X_temp, y_train, y_temp = train_test_split(
    X_train_proc, y_train_enc,
    test_size=0.30, stratify=y_train_enc, random_state=42
)
X_val, X_test, y_val, y_test = train_test_split(
    X_temp, y_temp,
    test_size=0.50, stratify=y_temp, random_state=42
)
print(len(X_train), len(X_val), len(X_test))

##### 2.2 One hot encode labels

In [None]:
# One-hot encoding of labels into binary matrix
num_classes = len(label_encoder.classes_)
y_train = to_categorical(y_train, num_classes)
y_val   = to_categorical(y_val, num_classes)
y_test  = to_categorical(y_test, num_classes)

##### 2.2 Building CNN model

In [None]:
# Build model
model = models.Sequential([
    layers.Conv2D(32, (3,3), activation='relu', input_shape=(64,64,1)),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(64, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Conv2D(128, (3,3), activation='relu'),
    layers.MaxPooling2D((2,2)),
    layers.Flatten(),
    layers.Dense(128, activation='relu'),
    layers.Dropout(0.4),
    layers.Dense(num_classes, activation='softmax')
])

  super().__init__(activity_regularizer=activity_regularizer, **kwargs)


#### 2.3 Compile with optimiser

In [None]:
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss="categorical_crossentropy", metrics=["accuracy"])

#### 2.4 Setup Early Stopping and Model Checkpoints

In [None]:
early_stop = EarlyStopping(monitor='val_loss', patience=3, 
                           restore_best_weights=True)
checkpoint = ModelCheckpoint(
    filepath=os.path.join(results_dir, f"{run}.keras"),
    monitor='val_accuracy', save_best_only=True)

#### 3. Training Model

In [None]:
history = model.fit(
    X_train, y_train,
    validation_data=(X_val, y_val),
    epochs=15,
    batch_size=64,
    callbacks=[early_stop, checkpoint],
    verbose=1
)

##### 3.2 Fitting model

In [None]:
history = model.fit(
    X_train, y_train,
    validation_split=(X_val, y_val),
    epochs=15,
    batch_size=64,
    callbacks=[early_stop, checkpoint],
    verbose=1
)

### 4. Evaluation
###### inclues: Accuracy, DR/FAR, Precision/Recall/F1, ROC Area, Confusion Matrix

##### 4.1 Calculate metrics and loss values between input and target

In [None]:
test_loss, test_acc = model.evaluate(X_test, y_test, verbose=1)
y_prob = model.predict(X_test, verbose=0)
y_pred = np.argmax(y_prob, axis=1)
y_test_true_int = np.argmax(y_test, axis=1)
print(f" Test Accuracy: {test_acc:.4f}")
print(f" Test Loss: {test_loss:.4f}")

##### 4.2 Convert integer indices back to class names

In [None]:
y_test_true_labels = label_encoder.inverse_transform(y_test_true_int)
y_pred_labels = label_encoder.inverse_transform(y_pred)

##### 4.3 Print and save classification report

In [None]:
report = classification_report(y_test_true_labels, y_pred_labels)
print("\nClassification Report:")
print(report)
report_path = os.path.join(results_dir, "classification_report.txt")
with open(report_path, "w") as f:
    f.write("Classification Report\n\n")
    f.write(report)
print(f"Saved classification report: {report_path}")

##### 4.4 Calculate and save metrics

In [None]:
accuracy = accuracy_score(y_test_true_int, y_pred)
precision = precision_score(y_test_true_int, y_pred, average="macro", zero_division=0)
recall = recall_score(y_test_true_int, y_pred, average="macro", zero_division=0)
f1 = f1_score(y_test_true_int, y_pred, average="macro", zero_division=0)
try:
    auc = roc_auc_score(y_test, y_prob, multi_class="ovr")
except Exception as e:
    print(f"AUC failed: {e}")
    auc = np.nan
roc_area = auc

cm = confusion_matrix(y_test_true_int, y_pred)
FP = cm.sum(axis=0) - np.diag(cm)
TN = cm.sum() - (FP + (cm.sum(axis=1) - np.diag(cm)) + np.diag(cm))
FAR = np.mean(FP / (FP + TN + 1e-12))

# Save
save_run_metrics(run, accuracy, precision, recall, f1, FAR, roc_area, auc)
print(f"Saved metrics for {run}")
with open(os.path.join(results_dir, "metrics.txt"), "w") as f:
    f.write(f"Accuracy: {accuracy:.4f}\n")
    f.write(f"Precision: {precision:.4f}\n")
    f.write(f"Recall: {recall:.4f}\n")
    f.write(f"F1: {f1:.4f}\n")
    f.write(f"FAR: {FAR:.6f}\n")
    f.write(f"AUC: {auc if not np.isnan(auc) else 'NaN'}\n")


##### 4.5 Plot confusion matrix and training curves

In [None]:
# Plot confusion matrix
plt.figure(figsize=(10,8))
sns.heatmap(cm, annot=True, cmap="viridis")
plt.title(f"Confusion Matrix - {run}")
plt.grid(True)
plt.savefig(f"{results_dir}/{run}_confusion_matrix.png", dpi=300)
plt.close()
# Plot training curves
plt.figure()
plt.plot(history.history["accuracy"], label="Train Acc")
plt.plot(history.history["val_accuracy"], label="Val Acc")
plt.title(f"Training Vs Validation Accuracy - {run}")
plt.legend()
plt.grid(True)
plt.savefig(f"{results_dir}/{run}_accuracy_curve.png")
plt.close()
plt.figure()
plt.plot(history.history["loss"], label="Train Loss")
plt.plot(history.history["val_loss"], label="Val Loss")
plt.title(f"Training Vs Validation Loss - {run}")
plt.legend()
plt.grid(True)
plt.savefig(f"{results_dir}/{run}_loss_curve.png")
plt.close()

##### 4.4 Save output data

In [None]:
np.save(os.path.join(results_dir, "y_test_int.npy"), y_test_true_int)
np.save(os.path.join(results_dir, "y_pred_int.npy"), y_pred)
# Save one-hot arrays for ROC calculations
np.save(os.path.join(results_dir, "y_test_onehot.npy"), y_test)
np.save(os.path.join(results_dir, "y_pred_proba.npy"), y_prob)

In [None]:
# Save seperately in the model folder
model.save(f"../models/{run}_asl_cnn_model.h5")