In [None]:
# L 4-30-25
# notebooks/classify_new_song.ipynb

In [None]:
# notebooks/classify_new_song.ipynb

# Modular inference pipeline setup for song classification using RF + CNN

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import joblib
import runpy

import step_3_1_Gen_Spectro
import step_3_2_CNN_Spectro
import importlib

sys.path.append(os.getcwd())  # ensure working directory is included
importlib.reload(step_3_1_Gen_Spectro)  # force reload in case of cache
importlib.reload(step_3_2_CNN_Spectro)  # force reload in case of cache


RUN_MODE = "songs"

# Paths
SONG_DIR = "../data/Songs/"

RF_OUTPUT_DIR = "../reports/4_Classify_New_Song/RF/"
CNN_OUTPUT_DIR = "../reports/4_Classify_New_Song/CNN/"
CNN_BUNDLE_PATH = "../models/cnn_inference_bundle.pth"
CNN_MODEL_PATH = "../models/cnn_model.pth"
SPECTRO_DIR = "../reports/4_Classify_New_Song/spectrograms/"
COMBINED_OUTPUT_PATH = "../reports/4_Classify_New_Song/all_model_predictions.csv"

# Ensure directories exist
os.makedirs(RF_OUTPUT_DIR, exist_ok=True)
os.makedirs(CNN_OUTPUT_DIR, exist_ok=True)
os.makedirs(SPECTRO_DIR, exist_ok=True)

# Step 1: Generate spectrograms for new songs
DEFAULT_TRAIN_SPECTRO_DIR = "../spectrograms/"
if not os.path.exists(DEFAULT_TRAIN_SPECTRO_DIR) or len(os.listdir(DEFAULT_TRAIN_SPECTRO_DIR)) == 0:
    print("[INFO] Training spectrograms missing — generating using default config.")
    step_3_1_Gen_Spectro.generate_all()  # Calls step_3_1_Gen_Spectro.generate_all() with training paths
step_3_1_Gen_Spectro.generate_spectrograms_from_folder(
    song_dir=SONG_DIR,
    output_dir=SPECTRO_DIR,
    img_size=128
)

# Step 2: Run CNN model to regenerate predictions
if not (os.path.exists(CNN_BUNDLE_PATH) and os.path.exists(CNN_MODEL_PATH)):
    print("[INFO] CNN models not found. Training from FMA spectrograms...")
    step_3_2_CNN_Spectro.run()
cnn_df = step_3_2_CNN_Spectro.cnn_predict_folder(
    bundle_path=CNN_BUNDLE_PATH,
    image_dir=SPECTRO_DIR
)
cnn_df.to_csv(os.path.join(CNN_OUTPUT_DIR,
              "cnn_song_predictions.csv"), index=False)


# Step 3: Load Random Forest model
rf_model = joblib.load("../models/rf_model.pkl")

# Step 4: Helper for MFCC extraction


def extract_mfcc(path):
    y, sr = librosa.load(path, sr=None, duration=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    return np.concatenate([np.mean(mfcc, axis=1), np.std(mfcc, axis=1)])


# Step 5: Load CNN predictions from 3.2 output
cnn_df = pd.read_csv(os.path.join(CNN_OUTPUT_DIR, "cnn_song_predictions.csv"))

# Step 6: Inference loop
results = []
for genre_dir in os.listdir(SONG_DIR):
    genre_path = os.path.join(SONG_DIR, genre_dir)
    if not os.path.isdir(genre_path):
        continue

    for fname in os.listdir(genre_path):
        if not fname.endswith(".mp3"):
            continue

        base = os.path.splitext(fname)[0]
        fpath = os.path.join(genre_path, fname)
        spectro_path = os.path.join(SPECTRO_DIR, genre_dir, f"{base}.png")

        try:
            mfcc_vector = extract_mfcc(fpath).reshape(1, -1)
            rf_pred = rf_model.predict(mfcc_vector)[0]
            cnn_pred = cnn_df.loc[cnn_df['file'] == fname,
                                  'CNN'].values[0] if fname in cnn_df['file'].values else "N/A"

            results.append({
                "file": fname,
                "true_genre": genre_dir,
                "RF": rf_pred,
                "CNN": cnn_pred,
                "spectrogram_path": spectro_path
            })
        except Exception as e:
            print(f"Error processing {fname}: {e}")

# Step 7: Save combined predictions
df = pd.DataFrame(results)
df.to_csv(COMBINED_OUTPUT_PATH, index=False)

# Step 8: Evaluation (RF and CNN)
for model in ["RF", "CNN"]:
    if model not in df.columns:
        continue

    y_true = df["true_genre"]
    y_pred = df[model]
    labels_sorted = sorted(set(y_true) | set(y_pred))

    report = classification_report(
        y_true, y_pred, labels=labels_sorted, zero_division=0)
    with open(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR,
                           f"{model.lower()}_classification_report.txt"), "w") as f:
        f.write(report)

    cm = confusion_matrix(y_true, y_pred, labels=labels_sorted)
    plt.figure(figsize=(10, 8))
    plt.imshow(cm, cmap='Blues', interpolation='nearest')
    plt.title(f"Confusion Matrix - {model}")
    plt.colorbar()
    plt.xticks(np.arange(len(labels_sorted)),
               labels_sorted, rotation=45, ha='right')
    plt.yticks(np.arange(len(labels_sorted)), labels_sorted)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR,
                             f"{model.lower()}_confusion_matrix.png"))
    plt.close()

    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, labels=labels_sorted, zero_division=0)
    metrics = {"Precision": precision, "Recall": recall, "F1-Score": f1}

    for metric_name, values in metrics.items():
        plt.figure(figsize=(10, 5))
        plt.bar(labels_sorted, values)
        plt.title(f"{metric_name} per Genre - {model}")
        plt.ylabel(metric_name)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        fname = f"{model.lower()}_{metric_name.lower().replace('-', '_')}_bar.png"
        plt.savefig(os.path.join(RF_OUTPUT_DIR if model ==
                    "RF" else CNN_OUTPUT_DIR, fname))
        plt.close()

[INFO] Generating spectrograms from folder: ../data/Songs/
[INFO] Training CNN on: c:\Users\ranuL\Documents\GitHub\GenreWave\notebooks\../spectrograms
[INFO] Epoch 1, Loss: 1.8442
[INFO] Epoch 2, Loss: 1.6268
[INFO] Epoch 3, Loss: 1.5312
[INFO] Epoch 4, Loss: 1.4699
[INFO] Epoch 5, Loss: 1.4171
               precision    recall  f1-score   support

   Electronic       0.46      0.56      0.50       207
 Experimental       0.46      0.27      0.34       218
         Folk       0.41      0.68      0.51       188
      Hip-Hop       0.45      0.68      0.54       203
 Instrumental       0.55      0.42      0.48       198
International       0.45      0.48      0.46       193
          Pop       0.20      0.06      0.09       209
         Rock       0.60      0.60      0.60       183

     accuracy                           0.46      1599
    macro avg       0.45      0.47      0.44      1599
 weighted avg       0.44      0.46      0.44      1599



FileNotFoundError: [Errno 2] No such file or directory: '../reports/4_Classify_New_Song/CNN/cnn_song_predictions.csv'