In [None]:

import os
import numpy as np
import pandas as pd
import joblib
import matplotlib.pyplot as plt

from sklearn.metrics import (
    accuracy_score,
    classification_report,
    confusion_matrix,
    ConfusionMatrixDisplay,
    f1_score,
    precision_score,
    recall_score
)

# Load full dataset (features + labels)
X_features = np.load(FEATURES_PATH)
y_labels = np.load(LABELS_PATH)

print("Dataset loaded:", X_features.shape, y_labels.shape)

# Load the best model
model_files = [f for f in os.listdir("models") if f.startswith("best_model")]
assert len(model_files) > 0, "No trained model found in 'models/'"
model_path = os.path.join("models", model_files[0])
best_model = joblib.load(model_path)

print("Loaded best model: ", model_path)

from sklearn.model_selection import train_test_split

# Train + temp (val+test)
X_train, X_temp, y_train, y_temp = train_test_split(X_features, y_labels, test_size=0.30, random_state=42, stratify=y_labels)

# Validation + test
X_val, X_test, y_val, y_test = train_test_split(X_temp, y_temp, test_size=0.50, random_state=42, stratify=y_temp)

print("Test set size:", X_test.shape[0])

# Predictions
y_test_pred = best_model.predict(X_test)

# Accuracy
acc_test = accuracy_score(y_test, y_test_pred)
print("Test Accuracy:", acc_test)

# Detailed classification report
report = classification_report(y_test, y_test_pred, target_names=GENRES, digits=3)
print(report)

cm = confusion_matrix(y_test, y_test_pred, labels=GENRES)

fig, ax = plt.subplots(figsize=(8,8))
disp = ConfusionMatrixDisplay(confusion_matrix=cm, display_labels=GENRES)
disp.plot(ax=ax, cmap="Blues", xticks_rotation=45, colorbar=False)
ax.set_title("Confusion Matrix - Test Set")
plt.tight_layout()
plt.show()

precision = precision_score(y_test, y_test_pred, average=None, labels=GENRES)
recall = recall_score(y_test, y_test_pred, average=None, labels=GENRES)
f1 = f1_score(y_test, y_test_pred, average=None, labels=GENRES)

df_metrics = pd.DataFrame({
    "Genre": GENRES,
    "Precision": precision,
    "Recall": recall,
    "F1-score": f1
}).set_index("Genre")

print(df_metrics)

# Plot per-class metrics
df_metrics.plot(kind="bar", figsize=(10,6))
plt.title("Per-class metrics on Test Set")
plt.ylabel("Score")
plt.ylim(0,1)
plt.xticks(rotation=45)
plt.legend(loc="lower right")
plt.tight_layout()
plt.show()