In [None]:
import pandas as pd
import numpy as np
import torch
from torchvision import transforms
from sklearn.model_selection import train_test_split
from torch.utils.data import DataLoader
import os
from PIL import Image
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, f1_score
import pickle
import cv2
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import StratifiedKFold
import matplotlib.pyplot as plt
from sklearn.metrics import classification_report

In [None]:
# Set the root file path for the project
file_path = r"C:\Users\xiluo\Desktop\UoM 2025 S1\ML\COMP30027 asmt2"

In [None]:
# Load and merge training metadata and features
train_metadata = pd.read_csv(os.path.join(file_path, "data", "train", "train_metadata.csv"))
train_color_features = pd.read_csv(os.path.join(file_path, "data", "train", "Features", "color_histogram.csv"))
train_pca_features = pd.read_csv(os.path.join(file_path, "data", "train", "Features", "hog_pca.csv"))
train_additional_features = pd.read_csv(os.path.join(file_path, "data", "train", "Features", "additional_features.csv"))

train_metadata = train_metadata.merge(train_color_features, on="image_path", how="left")
train_metadata = train_metadata.merge(train_pca_features, on="image_path", how="left")
train_metadata = train_metadata.merge(train_additional_features, on="image_path", how="left")

In [None]:
# Load and merge test metadata and features
test_metadata = pd.read_csv(os.path.join(file_path, "data", "test", "test_metadata.csv"))
test_color_features = pd.read_csv(os.path.join(file_path, "data", "test", "Features", "color_histogram.csv"))
test_pca_features = pd.read_csv(os.path.join(file_path, "data", "test", "Features", "hog_pca.csv"))
test_additional_features = pd.read_csv(os.path.join(file_path, "data", "test", "Features", "additional_features.csv"))    

test_metadata = test_metadata.merge(test_color_features, on="image_path", how="left")
test_metadata = test_metadata.merge(test_pca_features, on="image_path", how="left")
test_metadata = test_metadata.merge(test_additional_features, on="image_path", how="left")

In [None]:
with pd.option_context('display.max_columns', None):
    display(train_metadata.head(20))

In [None]:
features = [col for col in train_metadata.columns if col not in ["image_path", "id", "ClassId"]]

# First split for train set and holdout set
train_meta, holdout_meta = train_test_split(
    train_metadata, test_size=0.2, stratify=train_metadata['ClassId'], random_state=42
)

# only focus on the train set for now
X = train_meta['image_path'].values
y = train_meta['ClassId'].values

X_test = test_metadata[features]
test_metadata['ClassId'] = -1

In [None]:
# import the CNN modules
import sys
sys.path.append('../utils')
from GTRSB_CNN import SimpleCNN
from transform import transform

# setup for 5 folds cross validation
n_folds = 5
skf = StratifiedKFold(n_splits=5, shuffle=True, random_state=42)
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# initialise arrays to store cv validation, holdout set and test results
n_classes = 43
n_samples = len(train_meta)
n_test = len(test_metadata)

cnn_val_probs = np.zeros((n_samples, n_classes))
xgb_val_probs = np.zeros((n_samples, n_classes))
svm_val_probs = np.zeros((n_samples, n_classes))

cnn_test_probs_folds = np.zeros((n_test, n_classes, n_folds))
xgb_test_probs_folds = np.zeros((n_test, n_classes, n_folds))
svm_test_probs_folds = np.zeros((n_test, n_classes, n_folds))

cnn_holdout_preds_folds = np.zeros((len(holdout_meta), n_classes, n_folds))
xgb_holdout_preds_folds = np.zeros((len(holdout_meta), n_classes, n_folds))
svm_holdout_preds_folds = np.zeros((len(holdout_meta), n_classes, n_folds))

# Start Inference!
for fold, (train_idx, val_idx) in enumerate(skf.split(X, y)):
    # 1. CNN
    cnn_model = SimpleCNN(num_classes=n_classes)
    cnn_model.load_state_dict(torch.load(os.path.join(file_path, "models", "cnn_models", f'cnn_fold{fold+1}_best.pth'), map_location=device))
    cnn_model.eval()
    cnn_model.to(device)
    # inference on validation fold
    val_img_paths = train_meta.iloc[val_idx]['image_path'].values
    val_imgs = [Image.open(os.path.join(file_path, "data", "train", p)).convert('RGB') for p in val_img_paths]
    val_tensors = torch.stack([transform(img) for img in val_imgs]).to(device)
    with torch.no_grad():
        logits = cnn_model(val_tensors)
        # use softmax layer to obtain the predicted probs for each class
        probs = torch.softmax(logits, dim=1).cpu().numpy()
    cnn_val_probs[val_idx] = probs
    # inference on whole test set
    test_img_paths = test_metadata['image_path'].values
    test_imgs = [Image.open(os.path.join(file_path, "data", "test", p)).convert('RGB') for p in test_img_paths]
    test_tensors = torch.stack([transform(img) for img in test_imgs]).to(device)
    with torch.no_grad():
        test_logits = cnn_model(test_tensors)
        test_probs = torch.softmax(test_logits, dim=1).cpu().numpy()
    cnn_test_probs_folds[:, :, fold] = test_probs
    # inference on whole holdout set
    holdout_img_paths = holdout_meta['image_path'].values
    holdout_imgs = [Image.open(os.path.join(file_path, "data", "train", p)).convert('RGB') for p in holdout_img_paths]
    holdout_tensors = torch.stack([transform(img) for img in holdout_imgs]).to(device)
    with torch.no_grad():
        holdout_logits = cnn_model(holdout_tensors)
        holdout_probs = torch.softmax(holdout_logits, dim=1).cpu().numpy()
    cnn_holdout_preds_folds[:, :, fold] = holdout_probs

    # 2. XGB
    with open(os.path.join(file_path, "models", "xgb_models.pkl"), "rb") as f:
        xgb_models = pickle.load(f)
    xgb_model = xgb_models[fold]
    # inference on validation fold
    val_xgb_feats = train_meta[features].iloc[val_idx]
    xgb_preds = xgb_model.predict_proba(val_xgb_feats)
    xgb_val_probs[val_idx] = xgb_preds
    # inference on whole test set
    test_xgb_feats = test_metadata[features]
    xgb_test_probs = xgb_model.predict_proba(test_xgb_feats)
    xgb_test_probs_folds[:, :, fold] = xgb_test_probs
    # inference on whole holdout set
    holdout_xgb_feats = holdout_meta[features]
    xgb_holdout_probs = xgb_model.predict_proba(holdout_xgb_feats)
    xgb_holdout_preds_folds[:, :, fold] = xgb_holdout_probs

    # 3. SVM
    # scaler fit on train, transform val and test
    scaler = StandardScaler()
    train_svm_feats = train_meta[features].iloc[train_idx]
    val_svm_feats = train_meta[features].iloc[val_idx]
    test_svm_feats = test_metadata[features]
    train_svm_feats_scaled = scaler.fit_transform(train_svm_feats)
    val_svm_feats_scaled = scaler.transform(val_svm_feats)
    test_svm_feats_scaled = scaler.transform(test_svm_feats)

    with open(os.path.join(file_path, "models", "svm_models.pkl"), "rb") as f:
        svm_models = pickle.load(f)
    svm_model = svm_models[fold]
    # inference on validation fold
    svm_preds = svm_model.predict_proba(val_svm_feats_scaled)
    svm_val_probs[val_idx] = svm_preds
    # inference on whole test set
    svm_test_probs = svm_model.predict_proba(test_svm_feats_scaled)
    svm_test_probs_folds[:, :, fold] = svm_test_probs
    # inference on whole holdout set
    holdout_svm_feats = holdout_meta[features]
    holdout_svm_feats_scaled = scaler.transform(holdout_svm_feats)
    svm_holdout_probs = svm_model.predict_proba(holdout_svm_feats_scaled)
    svm_holdout_preds_folds[:, :, fold] = svm_holdout_probs

In [None]:
# stack the OOF validation predicted probs as input to the meta model
val_stack = np.concatenate([cnn_val_probs, xgb_val_probs, svm_val_probs], axis=1)
y_val = y  # The label of 80% of training data

# train the stacking model
meta_model = LogisticRegression(max_iter=1000)
meta_model.fit(val_stack, y_val)

In [None]:
# Evaluate on holdout set
# Aggregated the holdout prediction of each 5 models
cnn_holdout_probs = np.mean(cnn_holdout_preds_folds, axis=2) # output shape: (n_holdout, n_classes)
xgb_holdout_probs = np.mean(xgb_holdout_preds_folds, axis=2) # output shape: (n_holdout, n_classes)
svm_holdout_probs = np.mean(svm_holdout_preds_folds, axis=2) # output shape: (n_holdout, n_classes)

holdout_stack = np.concatenate([cnn_holdout_probs, xgb_holdout_probs, svm_holdout_probs], axis=1)

# Get the true labels in holdout set
y_holdout = holdout_meta['ClassId'].values

# Evaluate
ensemble_holdout_preds = meta_model.predict(holdout_stack)
print('Ensemble holdout acc:', accuracy_score(y_holdout, ensemble_holdout_preds))
print('Ensemble holdout f1:', f1_score(y_holdout, ensemble_holdout_preds, average='macro'))

# Store stacking ensemble validation result for visualisation
save_dir = os.path.join(file_path, "results", "sankey_data")
np.save(os.path.join(save_dir, "holdout_y.npy"), y_holdout)
np.save(os.path.join(save_dir, "ensemble_holdout_preds.npy"), ensemble_holdout_preds)

In [None]:
# Ensemble holdout acc: 0.9899817850637522
# Ensemble holdout f1: 0.9898522457678046

In [None]:
# Error visualisation

wrong_idx = np.where(ensemble_holdout_preds != y_holdout)[0]

# find corresponding images in holdout_meta
wrong_samples = holdout_meta.iloc[wrong_idx].copy()
wrong_samples['pred'] = ensemble_holdout_preds[wrong_idx]

# show first 12 wrong samples
N = min(12, len(wrong_samples))
plt.figure(figsize=(15, 8))
for i, (_, row) in enumerate(wrong_samples.head(N).iterrows()):
    img_path = os.path.join(file_path, "data", "train", row['image_path'])
    img = Image.open(img_path)
    plt.subplot(3, 4, i+1)
    plt.imshow(img)
    plt.title(f"True: {row['ClassId']}\nPred: {row['pred']}")
    plt.axis('off')
plt.tight_layout()
plt.show()

In [None]:
report_dict = classification_report(y_holdout, ensemble_holdout_preds, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
print(report_df)

class_rows = report_df.iloc[:-3, :]

x = list(range(43))
plt.figure(figsize=(14, 6))
plt.plot(x, class_rows['precision'], marker='o', label='Precision', color='#1f77b4')
plt.plot(x, class_rows['recall'], marker='o', label='Recall', color='#2ca02c')
plt.plot(x, class_rows['f1-score'], marker='o', label='F1-score', color='#ff7f0e')

plt.axhline(report_df.loc['macro avg', 'f1-score'], color='gray', linestyle='--', label='Macro F1')
plt.axhline(report_df.loc['weighted avg', 'f1-score'], color='orange', linestyle='--', label='Weighted F1')

plt.xlabel('Class', fontsize=13)
plt.ylabel('Score', fontsize=13)
plt.title('Stacking Model Per-Class Precision, Recall, F1-score (with Macro/Weighted F1)', fontsize=15, pad=12)
plt.ylim(0, 1.05)
plt.xticks(x, x, fontsize=11, rotation=0)
plt.yticks(fontsize=11)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.legend(loc='lower left', fontsize=11, framealpha=0.85)
plt.tight_layout()
plt.show()

In [None]:
# Aggregated the test prediction of each 5 models
cnn_test_probs = np.mean(cnn_test_probs_folds, axis=2)  # output shape: (n_test, n_classes)
svm_test_probs = np.mean(svm_test_probs_folds, axis=2)  # output shape: (n_test, n_classes)
xgb_test_probs = np.mean(xgb_test_probs_folds, axis=2)  # output shape: (n_test, n_classes)

test_stack = np.concatenate([cnn_test_probs, xgb_test_probs, svm_test_probs], axis=1)

# use the stacking model to predict the test set
final_preds = meta_model.predict(test_stack)

# save the final ensemble model prediction
test_metadata["ClassId"] = final_preds
test_metadata[["id", "ClassId"]].to_csv(os.path.join(file_path, "results", "submission_ensemble.csv"), index=False)