In [None]:
# L 4-30-25
# notebooks/classify_new_song.ipynb

In [None]:
RUN_MODE = "songs"
%run ./3.1_Generate_Spectrograms.ipynb
%run ./3.2_CNN_Spectrogram_Classifier.ipynb

import os
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import joblib
from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support

# Paths
SONG_DIR = "../data/Songs/"
SPECTRO_DIR = "../reports/4_Classify_New_Song/spectrograms/"
RF_OUTPUT_DIR = "../reports/4_Classify_New_Song/RF/"
CNN_OUTPUT_DIR = "../reports/4_Classify_New_Song/CNN/"
os.makedirs(RF_OUTPUT_DIR, exist_ok=True)
os.makedirs(CNN_OUTPUT_DIR, exist_ok=True)

# Load Random Forest model
rf_model = joblib.load("../models/rf_model.pkl")

# Helper for MFCC extraction
def extract_mfcc(path):
    y, sr = librosa.load(path, sr=None, duration=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    return np.concatenate([np.mean(mfcc, axis=1), np.std(mfcc, axis=1)])

# Load CNN predictions from 3.2
cnn_df = pd.read_csv(os.path.join(CNN_OUTPUT_DIR, "cnn_song_predictions.csv"))

# Inference
results = []
for genre_dir in os.listdir(SONG_DIR):
    genre_path = os.path.join(SONG_DIR, genre_dir)
    if not os.path.isdir(genre_path):
        continue

    for fname in os.listdir(genre_path):
        if not fname.endswith(".mp3"):
            continue

        base = os.path.splitext(fname)[0]
        fpath = os.path.join(genre_path, fname)
        spectro_path = os.path.join(SPECTRO_DIR, genre_dir, f"{base}.png")

        try:
            mfcc_vector = extract_mfcc(fpath).reshape(1, -1)
            rf_pred = rf_model.predict(mfcc_vector)[0]
            cnn_pred = cnn_df.loc[cnn_df['file'] == fname, 'CNN'].values[0] if fname in cnn_df['file'].values else "N/A"

            results.append({
                "file": fname,
                "true_genre": genre_dir,
                "RF": rf_pred,
                "CNN": cnn_pred,
                "spectrogram_path": spectro_path
            })
        except Exception as e:
            print(f"Error processing {fname}: {e}")

# Save combined predictions
df = pd.DataFrame(results)
df.to_csv(os.path.join(RF_OUTPUT_DIR, "all_model_predictions.csv"), index=False)

# Evaluation (for RF and CNN)
for model in ["RF", "CNN"]:
    if model not in df.columns:
        continue

    y_true = df["true_genre"]
    y_pred = df[model]
    labels_sorted = sorted(set(y_true) | set(y_pred))

    report = classification_report(y_true, y_pred, labels=labels_sorted, zero_division=0)
    with open(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR,
                           f"{model.lower()}_classification_report.txt"), "w") as f:
        f.write(report)

    cm = confusion_matrix(y_true, y_pred, labels=labels_sorted)
    plt.figure(figsize=(10, 8))
    plt.imshow(cm, cmap='Blues', interpolation='nearest')
    plt.title(f"Confusion Matrix - {model}")
    plt.colorbar()
    plt.xticks(np.arange(len(labels_sorted)), labels_sorted, rotation=45, ha='right')
    plt.yticks(np.arange(len(labels_sorted)), labels_sorted)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR,
                             f"{model.lower()}_confusion_matrix.png"))
    plt.close()

    precision, recall, f1, _ = precision_recall_fscore_support(y_true, y_pred, labels=labels_sorted, zero_division=0)
    metrics = {"Precision": precision, "Recall": recall, "F1-Score": f1}

    for metric_name, values in metrics.items():
        plt.figure(figsize=(10, 5))
        plt.bar(labels_sorted, values)
        plt.title(f"{metric_name} per Genre - {model}")
        plt.ylabel(metric_name)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        fname = f"{model.lower()}_{metric_name.lower().replace('-', '_')}_bar.png"
        plt.savefig(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR, fname))
        plt.close()
