In [None]:
import pandas as pd
import numpy as np
import sklearn
import xgboost as xgb
import pickle
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score
import gc
import os 
import cv2

from sklearn.svm import SVC
from sklearn.model_selection import GridSearchCV

from sklearn.preprocessing import StandardScaler
import matplotlib.pyplot as plt

from sklearn.model_selection import StratifiedKFold

import shap
from sklearn.metrics import confusion_matrix, classification_report

In [None]:
# Check the availability of GPU on xgboost
X = np.random.rand(100, 10)
y = np.random.randint(0, 2, 100)
dtrain = xgb.DMatrix(X, label=y)

params = {
    "tree_method": "gpu_hist",
    "objective": "binary:logistic",
    "verbosity": 1
}

try:
    bst = xgb.train(params, dtrain, num_boost_round=10)
    print("XGBoost GPU is available and working!")
except xgb.core.XGBoostError as e:
    print("XGBoost GPU is NOT available! Change the tree_method to cpu_hist in cell 27")
    print(e)

In [None]:
# change the file path to your own path
file_path = r"C:\Users\xiluo\Desktop\UoM 2025 S1\ML\COMP30027 asmt2"

## Data Preprocessing

In [None]:
# Merge all the train dataframes
train_metadata = pd.read_csv(os.path.join(file_path, "data", "train", "train_metadata.csv"))
train_color_features = pd.read_csv(os.path.join(file_path, "data", "train", "Features", "color_histogram.csv"))
train_pca_features = pd.read_csv(os.path.join(file_path, "data", "train", "Features", "hog_pca.csv"))
train_additional_features = pd.read_csv(os.path.join(file_path, "data", "train", "Features", "additional_features.csv"))

train_metadata = train_metadata.merge(train_color_features, on="image_path", how="left")
train_metadata = train_metadata.merge(train_pca_features, on="image_path", how="left")
train_metadata = train_metadata.merge(train_additional_features, on="image_path", how="left")

In [None]:
# Merge the test dataframes
test_metadata = pd.read_csv(os.path.join(file_path, "data", "test", "test_metadata.csv"))
test_color_features = pd.read_csv(os.path.join(file_path, "data", "test", "Features", "color_histogram.csv"))
test_pca_features = pd.read_csv(os.path.join(file_path, "data", "test", "Features", "hog_pca.csv"))
test_additional_features = pd.read_csv(os.path.join(file_path, "data", "test", "Features", "additional_features.csv"))    

test_metadata = test_metadata.merge(test_color_features, on="image_path", how="left")
test_metadata = test_metadata.merge(test_pca_features, on="image_path", how="left")
test_metadata = test_metadata.merge(test_additional_features, on="image_path", how="left")

In [None]:
train_metadata.head()

In [None]:
features = [col for col in train_metadata.columns if col not in ["image_path", "id", "ClassId"]]
print(len(features))

# First split for train set and holdout set
trainset, holdout_set = train_test_split(
    train_metadata,
    test_size=0.2,
    random_state=42,
    stratify=train_metadata['ClassId'],
    shuffle=True
)

X_train = trainset[features]
y_train = trainset["ClassId"]
X_test = test_metadata[features]

## XGB Model

In [None]:
# 5 Stratified Cross Validation for training
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

# Initialise arrays and list to save training results
xgb_val_preds = np.zeros(X_train.shape[0])
xgb_test_preds = np.zeros((X_test.shape[0], 43, n_splits))
xgb_models = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    # get the sub training set and val set in each loop
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]

    xgb_params = {
    'objective': 'multi:logloss',
    'num_class': 43,
    'eval_metric': 'mlogloss',
    'max_depth': 5,
    'learning_rate': 0.01,
    'n_estimators': 1000,
    'subsample': 0.7,
    'colsample_bytree': 0.7,
    'min_child_weight': 3,
    'alpha': 0.1,
    'lambda': 0.1,
    'random_state': 42,
    'verbosity': 0,
    'tree_method': 'gpu_hist', # change to gpu_hist if gpu is available
    'early_stopping_rounds': 50,
    }
    
    model = xgb.XGBClassifier(**xgb_params)
    model.fit(X_tr, y_tr, eval_set=[(X_val, y_val)], verbose=100)
    
    # Get the OOF prediction as validation result and for error analysis
    xgb_val_preds[val_idx] = model.predict(X_val)

    # Get the predicted probabilities on test set in each loop and aggregate later for robustness
    xgb_test_preds[:, :, fold] = model.predict_proba(X_test)

    # Save the model 
    xgb_models.append(model)

val_acc = accuracy_score(y_train, xgb_val_preds)
print(f"Validation Accuracy: {val_acc:.5f}")

macro_f1 = f1_score(y_train, xgb_val_preds, average='macro')
print(f"Macro F1 Score: {macro_f1:.5f}")

# save validation results for visualisation
save_dir = os.path.join(file_path, "results", "sankey_data")
np.save(os.path.join(save_dir, "y_train.npy"), y_train)
np.save(os.path.join(save_dir, "xgb_val_pred_labels.npy"), xgb_val_preds)

# Save the 5 xgb models
with open(os.path.join(file_path, "models", "xgb_models.pkl"), "wb") as f:
    pickle.dump(xgb_models, f)

In [None]:
# Average the 5 models with different training subsets
xgb_test_pred_probs = xgb_test_preds.mean(axis=2)  # shape: (n_test, n_classes)

# Select the class with highest probability
xgb_test_pred_labels = xgb_test_pred_probs.argmax(axis=1)  # shape: (n_test,)

# Save to result as the prediction for single XGB model
test_metadata["ClassId"] = xgb_test_pred_labels.astype(int)
test_metadata[["id", "ClassId"]].to_csv(os.path.join(file_path, "results", "submission_xgb.csv"), index=False)

# Evaluation of XGBoost

In [None]:
report_dict = classification_report(y_train, xgb_val_preds, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
print(report_df)

# Remove last three rows
class_rows = report_df.iloc[:-3, :]

x = list(range(43)) # The class labels
plt.figure(figsize=(14, 6))
plt.plot(x, class_rows['precision'], marker='o', label='Precision', color='#1f77b4')
plt.plot(x, class_rows['recall'], marker='o', label='Recall', color='#2ca02c')
plt.plot(x, class_rows['f1-score'], marker='o', label='F1-score', color='#ff7f0e')

plt.axhline(report_df.loc['macro avg', 'f1-score'], color='gray', linestyle='--', label='Macro F1')
plt.axhline(report_df.loc['weighted avg', 'f1-score'], color='orange', linestyle='--', label='Weighted F1')

plt.xlabel('Class', fontsize=13)
plt.ylabel('Score', fontsize=13)
plt.title('XGBoost Per-Class Precision, Recall, F1-score (with Macro/Weighted F1)', fontsize=15, pad=12)
plt.ylim(0, 1.05)
plt.xticks(x, x, fontsize=11, rotation=0)
plt.yticks(fontsize=11)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.legend(loc='lower left', fontsize=11, framealpha=0.85)
plt.tight_layout()
plt.show()

In [None]:
def plot_shape_based_misclassifications(y_true, y_pred, classes, class_title, figsize=(12, 8)):
    """
    draw the shape distribution of the misclassified samples for certain classes
    """
    
    # classify shapes and colors
    shape_classification = {
        'red_white_circular': [0, 1, 2, 3, 4, 5, 7, 8, 9, 10, 15, 16],
        'red_white_triangular': [11, 18, 19, 20, 21, 22, 23, 24, 25, 26, 27, 28, 29, 30, 31],
        'black_white_circular': [6, 32, 41, 42],
        'other': [12, 13, 14, 17],
        'blue_arrow_circular': [33, 34, 35, 36, 37, 38, 39, 40],
    }
    
    shape_colors = {
        'red_white_circular': '#FF0000',
        'red_white_triangular': '#FF9999',
        'black_white_circular': '#808080',
        'other': '#2ECC71',
        'blue_arrow_circular': '#3498DB'
    }
    
    def get_shape_category(class_id):
        class_id = int(class_id)
        for shape, classes in shape_classification.items():
            if class_id in classes:
                return shape
        return 'unknown'
    
    if classes is None:
        classes = range(len(np.unique(y_true)))
    
    # gather misclassified info
    error_data = {}
    for cls in classes:
        mask = y_true == cls
        predictions = y_pred[mask]
        unique_preds, pred_counts = np.unique(predictions, return_counts=True)
        error_dict = {pred: count for pred, count in zip(unique_preds, pred_counts) if pred != cls}
        error_data[cls] = error_dict
    
    # count and classify in terms of shape
    shape_errors = {shape: np.zeros(len(classes)) for shape in shape_classification.keys()}
    for i, cls in enumerate(classes):
        for pred_cls, count in error_data[cls].items():
            pred_shape = get_shape_category(pred_cls)
            if pred_shape in shape_errors:
                shape_errors[pred_shape][i] += count
    unique_class_count = {cls: len(error_data[cls].keys()) for cls in classes}
    

    plt.figure(figsize=figsize)
    bottom = np.zeros(len(classes))
    
    # draw stacked barplots
    for shape in shape_classification.keys():
        if np.any(shape_errors[shape]):
            plt.bar(range(len(classes)), shape_errors[shape], 
                    bottom=bottom, label=shape, color=shape_colors[shape])
            bottom += shape_errors[shape]
    
    # add values on bars
    for i, total in enumerate(bottom):
        if total > 0:
            cls = list(classes)[i]
            plt.text(i, total * 1.02,
                    f'{int(total)}\n({unique_class_count[cls]} classes)', 
                    ha='center', va='bottom')
    
    plt.xlabel('Original Class', fontsize=12)
    plt.ylabel('Number of Misclassifications', fontsize=12)
    title = 'Distribution of Shape-based Misclassifications'
    title += class_title
    plt.title(title, fontsize=14)
    x_labels = [f'Class {cls}\n({get_shape_category(cls)})' for cls in classes]
    plt.xticks(range(len(classes)), x_labels, rotation=45, ha='right')
    plt.legend(bbox_to_anchor=(1.05, 1), loc='upper left')
    plt.ylim(0, max(bottom) * 1.1)
    plt.tight_layout()

In [None]:
from sklearn.metrics import classification_report
# get the class with top 10 lowest f1-score
report = classification_report(y_train, xgb_val_preds, output_dict=True)
f1_scores = [report[str(i)]['f1-score'] for i in range(43)]
lowest_10_classes = np.argsort(f1_scores)[:10]
plot_shape_based_misclassifications(y_train, xgb_val_preds, lowest_10_classes, 'for 10 lowest F1-score classes', figsize=(12, 8))

In [None]:
plot_shape_based_misclassifications(y_train, xgb_val_preds, range(32,43), 'for uncommon classes', figsize=(12, 8))

In [None]:
def feature_stats(val_df, y_val, y_pred, cls_A, cls_B, features):
    """
    Compute mean and std for each feature in:
    - cls_A correctly predicted as cls_A
    - cls_B correctly predicted as cls_B
    - cls_A misclassified as cls_B
    """
    idx_AA = np.where((y_val == cls_A) & (y_pred == cls_A))[0]
    idx_BB = np.where((y_val == cls_B) & (y_pred == cls_B))[0]
    idx_AB = np.where((y_val == cls_A) & (y_pred == cls_B))[0]
    stats = []
    for feat in features:
        mean_AA = val_df.iloc[idx_AA][feat].mean()
        std_AA = val_df.iloc[idx_AA][feat].std()
        mean_BB = val_df.iloc[idx_BB][feat].mean()
        std_BB = val_df.iloc[idx_BB][feat].std()
        mean_AB = val_df.iloc[idx_AB][feat].mean()
        std_AB = val_df.iloc[idx_AB][feat].std()
        stats.append({
            'feature': feat,
            f'{cls_A}->{cls_A} mean': mean_AA,
            f'{cls_A}->{cls_A} std': std_AA,
            f'{cls_B}->{cls_B} mean': mean_BB,
            f'{cls_B}->{cls_B} std': std_BB,
            f'{cls_A}->{cls_B} mean': mean_AB,
            f'{cls_A}->{cls_B} std': std_AB,
        })
    return pd.DataFrame(stats)

def plot_top_features(stats_df, title):
    """
    Plot the top 20 features with the largest difference ratio.
    """
    top_20_features = stats_df.nlargest(20, 'diff_ratio')
    plt.figure(figsize=(15, 8))
    bars = plt.barh(top_20_features['feature'], top_20_features['diff_ratio'])
    for i, bar in enumerate(bars):
        width = bar.get_width()
        plt.text(width, bar.get_y() + bar.get_height()/2, 
                 f'{width:.2f}', 
                 ha='left', va='center', fontsize=10)
    plt.title(title)
    plt.xlabel('Difference Ratio')
    plt.ylabel('Feature Name')
    plt.tight_layout()
    plt.show()

# Get per-class F1 scores
report = classification_report(y_train, xgb_val_preds, output_dict=True)
f1_scores = [report[str(i)]['f1-score'] for i in range(43)]

# Find the class with lowest and highest F1-score
worst_cls = np.argmin(f1_scores)
best_cls = np.argmax(f1_scores)

print(f"Class with lowest F1-score: {worst_cls} (F1 = {f1_scores[worst_cls]:.4f})")
print(f"Class with highest F1-score: {best_cls} (F1 = {f1_scores[best_cls]:.4f})")

# For the worst class, find which class it is most often confused with
cm = confusion_matrix(y_train, xgb_val_preds, labels=np.arange(43))
pred_counts = cm[worst_cls].copy()
pred_counts[worst_cls] = 0  # Exclude correct predictions
most_confused = np.argmax(pred_counts)
print(f"\nClass {worst_cls} is most often predicted as: {most_confused}")
print(f"Number of misclassifications: {pred_counts[most_confused]}")

# Analyze feature differences
features_to_analyze = X_train.columns.tolist()

# Worst class vs its most confused class
print("\nAnalyzing worst performing class vs its most confused class:")
stats_df_worst = feature_stats(X_train, y_train, xgb_val_preds, worst_cls, most_confused, features_to_analyze)
stats_df_worst['diff_ratio'] = abs(stats_df_worst[f'{worst_cls}->{worst_cls} mean'] - stats_df_worst[f'{most_confused}->{most_confused} mean']) / \
                              ((stats_df_worst[f'{worst_cls}->{worst_cls} std'] + stats_df_worst[f'{most_confused}->{most_confused} std']) / 2)

# Plot results
plot_top_features(stats_df_worst, f'Top 20 Most Discriminative Features Between Class {worst_cls} and {most_confused} (Most Confused)')

In [None]:
def show_class_samples(class_ids):
    """
    show specified class samples
    """
    n_classes = len(class_ids)
    fig, axes = plt.subplots(1, n_classes, figsize=(8, 4))
    
    for i, class_id in enumerate(class_ids):
        # get all image paths of the class
        class_samples = train_metadata[train_metadata['ClassId'] == class_id]['image_path'].values
        
        # randomly select n_samples samples
        selected_samples = np.random.choice(class_samples, 1, replace=False)
        
        for j, img_path in enumerate(selected_samples):
            # read and show images
            img_path = os.path.join(file_path, "data", "train", img_path)
            img = cv2.imread(img_path)
            img = cv2.cvtColor(img, cv2.COLOR_BGR2RGB)
            axes[i].imshow(img)
            axes[i].axis('off')
            axes[i].set_title(f'Class {class_id}', pad=10)
    
    plt.tight_layout()
    plt.show()

# show the samples of class 0 and 1
show_class_samples([0, 1])

## Feature importance

In [None]:
# Calculate the shap values for each model and average them over samples and classes
shap_importances = []
for model in xgb_models:
    explainer = shap.TreeExplainer(model)
    shap_values = explainer.shap_values(X_train.sample(n=1000, random_state=42))
    # Average the shap values over samples and classes
    mean_shap = np.abs(shap_values).mean(axis=(0, 2))
    shap_importances.append(mean_shap)

# Average the shap values over 5 folds
mean_shap_importance = np.mean(shap_importances, axis=0)
feature_importance = pd.Series(mean_shap_importance, index=X_train.columns).sort_values(ascending=False)
print(feature_importance.head(30))

topn = 30
top_features = feature_importance.head(topn)[::-1]
plt.figure(figsize=(10, 8))
bars = plt.barh(top_features.index, top_features.values, color="#4682b4")
plt.xlabel("Mean(|SHAP value|)", fontsize=13)
plt.ylabel("Feature", fontsize=13)
plt.title("Top 30 Feature Importances (Mean SHAP, 5-fold XGBoost)", fontsize=16)
plt.tight_layout()
plt.show()

## SVM Model

In [None]:
n_splits = 5
skf = StratifiedKFold(n_splits=n_splits, shuffle=True, random_state=42)

n_classes = 43
svm_val_preds = np.zeros(X_train.shape[0], dtype=int)
svm_val_probs = np.zeros((X_train.shape[0], n_classes))
svm_test_preds = np.zeros((X_test.shape[0], n_classes, n_splits))
svm_models = []
scalers = []

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    X_tr, X_val = X_train.iloc[train_idx], X_train.iloc[val_idx]
    y_tr, y_val = y_train.iloc[train_idx], y_train.iloc[val_idx]
    
    scaler = StandardScaler()
    X_tr_scaled = scaler.fit_transform(X_tr)
    X_val_scaled = scaler.transform(X_val)
    X_test_scaled = scaler.transform(X_test)
    
    model = SVC(probability=True, random_state=42, C=10, gamma='scale', kernel='rbf')
    model.fit(X_tr_scaled, y_tr)
    
    # OOF prediction
    val_pred = model.predict(X_val_scaled)
    val_prob = model.predict_proba(X_val_scaled)
    svm_val_preds[val_idx] = val_pred
    svm_val_probs[val_idx] = val_prob
        
    # Test prediction
    svm_test_preds[:, :, fold] = model.predict_proba(X_test_scaled)
    
    svm_models.append(model)
    scalers.append(scaler)

# validation accuracy
val_acc = accuracy_score(y_train, svm_val_preds)
print(f"Validation Accuracy: {val_acc:.5f}")

# save the model and validation results
np.save(os.path.join(save_dir, "svm_val_pred_labels.npy"), svm_val_preds)
with open(os.path.join(file_path, "models", "svm_models.pkl"), "wb") as f:
    pickle.dump(svm_models, f)

# Evaluation of SVM

In [None]:
report_dict = classification_report(y_train, svm_val_preds, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
print(report_df)

# Only keep the class rows (remove accuracy, macro avg, weighted avg)
class_rows = report_df.iloc[:-3, :]

x = list(range(43))  # The class labels
plt.figure(figsize=(14, 6))
plt.plot(x, class_rows['precision'], marker='o', label='Precision', color='#1f77b4')
plt.plot(x, class_rows['recall'], marker='o', label='Recall', color='#2ca02c')
plt.plot(x, class_rows['f1-score'], marker='o', label='F1-score', color='#ff7f0e')

plt.axhline(report_df.loc['macro avg', 'f1-score'], color='gray', linestyle='--', label='Macro F1')
plt.axhline(report_df.loc['weighted avg', 'f1-score'], color='orange', linestyle='--', label='Weighted F1')

plt.xlabel('Class', fontsize=13)
plt.ylabel('Score', fontsize=13)
plt.title('SVM Per-Class Precision, Recall, F1-score (with Macro/Weighted F1)', fontsize=15, pad=12)
plt.ylim(0, 1.05)
plt.xticks(x, x, fontsize=11, rotation=0)
plt.yticks(fontsize=11)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.legend(loc='lower left', fontsize=11, framealpha=0.85)
plt.tight_layout()
plt.show()

In [None]:
report = classification_report(y_train, svm_val_preds, output_dict=True)
f1_scores = [report[str(i)]['f1-score'] for i in range(n_classes)]
lowest_10_classes = np.argsort(f1_scores)[:10]
print("Lowest 10 F1-score classes:", lowest_10_classes)

plot_shape_based_misclassifications(y_train, svm_val_preds, lowest_10_classes, 'for 10 lowest F1-score classes', figsize=(12, 8))   

In [None]:
plot_shape_based_misclassifications(y_train, svm_val_preds, range(32,43), 'for uncommon classes', figsize=(12, 8))

In [None]:
import numpy as np
import matplotlib.pyplot as plt

n_classes = 43 
gamma = svm_models[0]._gamma

# initialize the storage of three similarities
class_correct_to_sv = [[] for _ in range(n_classes)]      # correct samples to class support vectors
class_wrong_to_sv = [[] for _ in range(n_classes)]        # wrong samples to class support vectors
class_wrong_to_correct = [[] for _ in range(n_classes)]   # wrong samples to correct samples

def rbf_kernel(X, Y, gamma):
    dists = np.sum((X[:, None, :] - Y[None, :, :]) ** 2, axis=2)
    return np.exp(-gamma * dists)

def mean_without_outliers(arr, lower=0.05, upper=0.95):
    arr = np.asarray(arr)
    if len(arr) == 0:
        return np.nan
    q_low = np.quantile(arr, lower)
    q_high = np.quantile(arr, upper)
    filtered = arr[(arr >= q_low) & (arr <= q_high)]
    return filtered.mean() if len(filtered) > 0 else np.median(arr)

for fold, (train_idx, val_idx) in enumerate(skf.split(X_train, y_train)):
    model = svm_models[fold]
    scaler = scalers[fold]
    X_val = X_train.iloc[val_idx]
    y_val = y_train.iloc[val_idx].values
    X_val_scaled = scaler.transform(X_val)
    y_val_pred = model.predict(X_val_scaled)
    
    # get the class label of support vectors
    sv = model.support_vectors_
    sv_labels = model.predict(sv)

    for cls in range(n_classes):
        mask_cls = (y_val == cls)
        if not np.any(mask_cls):
            continue
            
        # get correct and wrong samples
        mask_correct = mask_cls & (y_val == y_val_pred)
        mask_wrong = mask_cls & (y_val != y_val_pred)
        X_correct = X_val_scaled[mask_correct]
        X_wrong = X_val_scaled[mask_wrong]
        
        # get the support vectors of current class
        sv_cls_mask = (sv_labels == cls)
        sv_cls = sv[sv_cls_mask]
        
        if len(sv_cls) == 0:
            continue

        if len(X_correct) > 0:
            # calculate the similarity between correct samples and class support vectors
            K_correct_sv = rbf_kernel(X_correct, sv_cls, gamma)
            class_correct_to_sv[cls].extend(K_correct_sv.max(axis=1))

        if len(X_wrong) > 0:
            # calculate the similarity between wrong samples and class support vectors
            K_wrong_sv = rbf_kernel(X_wrong, sv_cls, gamma)
            class_wrong_to_sv[cls].extend(K_wrong_sv.max(axis=1))
            
            # calculate the similarity between wrong samples and correct samples
            if len(X_correct) > 0:
                K_wrong_correct = rbf_kernel(X_wrong, X_correct, gamma)
                class_wrong_to_correct[cls].extend(K_wrong_correct.max(axis=1))

# calculate the average similarity
class_correct_sv_mean = [mean_without_outliers(v) for v in class_correct_to_sv]
class_wrong_sv_mean = [mean_without_outliers(v) for v in class_wrong_to_sv]
class_wrong_correct_mean = [mean_without_outliers(v) for v in class_wrong_to_correct]

# visualize
x = np.arange(n_classes)
plt.figure(figsize=(14,6))
plt.plot(x, class_correct_sv_mean, marker='o', label='Correct to Class SV', color='green')
plt.plot(x, class_wrong_sv_mean, marker='x', label='Wrong to Class SV', color='red')
plt.plot(x, class_wrong_correct_mean, marker='^', label='Wrong to Correct', color='blue')
plt.xlabel('Class')
plt.ylabel('Mean Max RBF Kernel Value')
plt.title('Sample Similarities Comparison (Using Class-Specific Support Vectors)')
plt.legend()
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.show()

In [None]:
# Average the 5 models with different training subsets
svm_test_pred_probs = svm_test_preds.mean(axis=2)  # shape: (n_test, n_classes)

# Select the class with highest probability
svm_test_pred_labels = svm_test_pred_probs.argmax(axis=1)  # shape: (n_test,)

# Save the prediction as the submission of single SVM model
test_metadata["ClassId"] = svm_test_pred_labels.astype(int)
test_metadata[["id", "ClassId"]].to_csv(os.path.join(file_path, "results", "submission_svm.csv"), index=False)

## Evaluate on Holdout set

In [None]:
# XGBoost Holdout Set Inference & Evaluation
n_holdout = len(holdout_set)
n_classes = 43
xgb_holdout_probs_folds = np.zeros((n_holdout, n_classes, 5))

for fold in range(5):
    xgb_model = xgb_models[fold]
    holdout_xgb_feats = holdout_set[features]
    xgb_holdout_probs = xgb_model.predict_proba(holdout_xgb_feats)
    xgb_holdout_probs_folds[:, :, fold] = xgb_holdout_probs

# average 5 models in 5 folds
xgb_holdout_probs_mean = np.mean(xgb_holdout_probs_folds, axis=2)
xgb_holdout_preds = np.argmax(xgb_holdout_probs_mean, axis=1)
np.save(os.path.join(save_dir, "xgb_holdout_pred_labels.npy"), xgb_holdout_preds)

# evaluate
y_true = holdout_set['ClassId']
report_dict = classification_report(y_true, xgb_holdout_preds, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
print(report_df)

# visualise
class_rows = report_df.iloc[:-3, :]
x = list(range(n_classes))
plt.figure(figsize=(14, 6))
plt.plot(x, class_rows['precision'], marker='o', label='Precision', color='#1f77b4')
plt.plot(x, class_rows['recall'], marker='o', label='Recall', color='#2ca02c')
plt.plot(x, class_rows['f1-score'], marker='o', label='F1-score', color='#ff7f0e')
plt.axhline(report_df.loc['macro avg', 'f1-score'], color='gray', linestyle='--', label='Macro F1')
plt.axhline(report_df.loc['weighted avg', 'f1-score'], color='orange', linestyle='--', label='Weighted F1')
plt.xlabel('Class')
plt.ylabel('Score')
plt.title('XGBoost Per-Class Precision, Recall, F1-score (with Macro/Weighted F1)')
plt.ylim(0, 1.05)
plt.xticks(x, x)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.legend(loc='lower left')
plt.tight_layout()
plt.show()

In [None]:
# SVM Holdout Set Inference & Evaluation
n_holdout = len(holdout_set)
n_classes = 43
svm_holdout_probs_folds = np.zeros((n_holdout, n_classes, 5))

for fold, (train_idx, val_idx) in enumerate(skf.split(train_metadata, train_metadata['ClassId'])):
    svm_model = svm_models[fold]
    scaler = StandardScaler()
    train_svm_feats = train_metadata.iloc[train_idx][features]
    scaler.fit(train_svm_feats)
    holdout_svm_feats = holdout_set[features]
    holdout_svm_feats_scaled = scaler.transform(holdout_svm_feats)
    svm_holdout_probs = svm_model.predict_proba(holdout_svm_feats_scaled)
    svm_holdout_probs_folds[:, :, fold] = svm_holdout_probs

# average 5 models
svm_holdout_probs_mean = np.mean(svm_holdout_probs_folds, axis=2)
svm_holdout_preds = np.argmax(svm_holdout_probs_mean, axis=1)
np.save(os.path.join(save_dir, "svm_holdout_pred_labels.npy"), svm_holdout_preds)

# evaluate 
y_true = holdout_set['ClassId']
report_dict = classification_report(y_true, svm_holdout_preds, output_dict=True)
report_df = pd.DataFrame(report_dict).transpose()
print(report_df)

# visualise results
class_rows = report_df.iloc[:-3, :]
x = list(range(n_classes))
plt.figure(figsize=(14, 6))
plt.plot(x, class_rows['precision'], marker='o', label='Precision', color='#1f77b4')
plt.plot(x, class_rows['recall'], marker='o', label='Recall', color='#2ca02c')
plt.plot(x, class_rows['f1-score'], marker='o', label='F1-score', color='#ff7f0e')
plt.axhline(report_df.loc['macro avg', 'f1-score'], color='gray', linestyle='--', label='Macro F1')
plt.axhline(report_df.loc['weighted avg', 'f1-score'], color='orange', linestyle='--', label='Weighted F1')
plt.xlabel('Class')
plt.ylabel('Score')
plt.title('SVM Per-Class Precision, Recall, F1-score (with Macro/Weighted F1)')
plt.ylim(0, 1.05)
plt.xticks(x, x)
plt.grid(axis='y', linestyle='--', alpha=0.5)
plt.legend(loc='lower left')
plt.tight_layout()
plt.show()