In [None]:
# L 4-30-25
# notebooks/classify_new_song.ipynb

In [None]:
# notebooks/classify_new_song.ipynb

from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support
import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import librosa.display
import joblib

# Paths
SONG_DIR = "../data/Songs/"
OUTPUT_DIR = "../reports/4_Classify_New_Song/RF/"
os.makedirs(OUTPUT_DIR, exist_ok=True)

RF_MODEL_PATH = "../models/rf_model.pkl"

# Load Random Forest Model
rf_model = joblib.load(RF_MODEL_PATH)

# Helpers


def extract_mfcc(path):
    y, sr = librosa.load(path, sr=None, duration=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    return np.concatenate([np.mean(mfcc, axis=1), np.std(mfcc, axis=1)])


# Inference
results = []
for genre_dir in os.listdir(SONG_DIR):
    genre_path = os.path.join(SONG_DIR, genre_dir)
    if not os.path.isdir(genre_path):
        continue

    for fname in os.listdir(genre_path):
        if not fname.endswith(".mp3"):
            continue
        fpath = os.path.join(genre_path, fname)
        base_name = os.path.splitext(fname)[0]

        try:
            mfcc_vector = extract_mfcc(fpath).reshape(1, -1)
            rf_pred = rf_model.predict(mfcc_vector)[0]

            results.append({
                "file": fname,
                "true_genre": genre_dir,
                "RF": rf_pred
            })

        except Exception as e:
            print(f"Error processing {fname}: {e}")

# Save predictions
df = pd.DataFrame(results)
df.to_csv(os.path.join(OUTPUT_DIR, "rf_model_predictions.csv"), index=False)

# Evaluation
true_labels = df["true_genre"]
predictions = df["RF"]
labels_sorted = sorted(set(true_labels) | set(predictions))

# Classification report
report = classification_report(
    true_labels, predictions, labels=labels_sorted, zero_division=0)
with open(os.path.join(OUTPUT_DIR, "rf_classification_report.txt"), "w") as f:
    f.write(report)

# Confusion matrix
cm = confusion_matrix(true_labels, predictions, labels=labels_sorted)
plt.figure(figsize=(10, 8))
plt.imshow(cm, interpolation='nearest', cmap='Blues')
plt.title("Confusion Matrix - RF")
plt.colorbar()
plt.xticks(np.arange(len(labels_sorted)),
           labels_sorted, rotation=45, ha='right')
plt.yticks(np.arange(len(labels_sorted)), labels_sorted)
plt.xlabel("Predicted")
plt.ylabel("True")
plt.tight_layout()
plt.savefig(os.path.join(OUTPUT_DIR, "rf_confusion_matrix.png"))
plt.close()

# Precision, Recall, F1-Score, Support
precision, recall, f1, support = precision_recall_fscore_support(
    true_labels, predictions, labels=labels_sorted, zero_division=0)
metrics = {"Precision": precision, "Recall": recall,
           "F1-Score": f1, "Support": support}

for metric_name, values in metrics.items():
    plt.figure(figsize=(10, 5))
    plt.bar(labels_sorted, values)
    plt.title(f"{metric_name} per Genre - RF")
    plt.ylabel(metric_name)
    plt.xticks(rotation=45, ha='right')
    plt.tight_layout()
    fname = f"rf_{metric_name.lower().replace('-', '_')}_bar.png"
    plt.savefig(os.path.join(OUTPUT_DIR, fname))
    plt.close()

