In [None]:
# L 4-30-25

In [25]:
# notebooks/step_final_classify.ipynb

# Modular inference pipeline setup for song classification using RF + CNN

import os
import sys
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import librosa
import joblib
import runpy

from sklearn.metrics import classification_report, confusion_matrix, precision_recall_fscore_support

import step_3_1_Gen_Spectro
import step_3_2_CNN_Spectro
import step_4_1_Gen_vggish_embed
import step_4_2_Vggish_Class
import importlib

sys.path.append(os.getcwd())  # ensure working directory is included
importlib.reload(step_3_1_Gen_Spectro)  # force reload in case of cache
importlib.reload(step_3_2_CNN_Spectro)  # force reload in case of cache
importlib.reload(step_4_1_Gen_vggish_embed)  # force reload in case of cache
importlib.reload(step_4_2_Vggish_Class)  # force reload in case of cache

RUN_MODE = "songs"

# Paths
SONG_DIR = "../data/Songs/"

RF_OUTPUT_DIR = "../reports/step_final_classify/RF/"
CNN_OUTPUT_DIR = "../reports/step_final_classify/CNN/"
CNN_BUNDLE_PATH = "../models/cnn_inference_bundle.pth"
CNN_MODEL_PATH = "../models/cnn_model.pth"
SPECTRO_DIR = "../reports/step_final_classify/spectrograms/"
VGGISH_TRAIN_DIR = "../step_4_1_vggish_embeddings/"
VGGISH_SONG_OUTPUT_DIR = "../reports/step_final_classify/vggish_embeddings/"
VGGISH_SONG_METADATA = "../reports/step_final_classify/vggish_metadata_songs.csv"

VGGISH_RF_MODEL = "../models/vggish_rf_model.pkl"
VGGISH_RF_SONG_REPORT_DIR = "../reports/step_final_classify/vggish_rf_eval/"

COMBINED_OUTPUT_PATH = "../reports/step_final_classify/all_model_predictions.csv"

# Ensure directories exist
os.makedirs(RF_OUTPUT_DIR, exist_ok=True)
os.makedirs(CNN_OUTPUT_DIR, exist_ok=True)
os.makedirs(SPECTRO_DIR, exist_ok=True)
os.makedirs(VGGISH_SONG_OUTPUT_DIR, exist_ok=True)
os.makedirs(VGGISH_RF_SONG_REPORT_DIR, exist_ok=True)


# Step 1: Generate spectrograms for new songs
DEFAULT_TRAIN_SPECTRO_DIR = "../spectrograms/"
if not os.path.exists(DEFAULT_TRAIN_SPECTRO_DIR) or len(os.listdir(DEFAULT_TRAIN_SPECTRO_DIR)) == 0:
    print("[INFO] Training spectrograms missing — generating using default config.")
    step_3_1_Gen_Spectro.generate_all()  # Calls step_3_1_Gen_Spectro.generate_all() with training paths
step_3_1_Gen_Spectro.generate_spectrograms_from_folder(
    song_dir=SONG_DIR,
    output_dir=SPECTRO_DIR,
    img_size=128
)

# Step 2: Run CNN model to regenerate predictions
if not (os.path.exists(CNN_BUNDLE_PATH) and os.path.exists(CNN_MODEL_PATH)):
    print("[INFO] CNN models not found. Training from FMA spectrograms...")
    step_3_2_CNN_Spectro.run()
cnn_df = step_3_2_CNN_Spectro.cnn_predict_folder(
    bundle_path=CNN_BUNDLE_PATH,
    image_dir=SPECTRO_DIR
)
cnn_df.to_csv(os.path.join(CNN_OUTPUT_DIR,
              "cnn_song_predictions.csv"), index=False)


# Step 3: Load Random Forest model
rf_model = joblib.load("../models/rf_model.pkl")

# Step 4: Helper for MFCC extraction


def extract_mfcc(path):
    y, sr = librosa.load(path, sr=None, duration=30)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=20)
    return np.concatenate([np.mean(mfcc, axis=1), np.std(mfcc, axis=1)])


# Step 5: Load CNN predictions from 3.2 output
cnn_df = pd.read_csv(os.path.join(CNN_OUTPUT_DIR, "cnn_song_predictions.csv"))

# Vggish

print("There may be some errors, this is expected")
step_4_1_Gen_vggish_embed.generate_vggish_embeddings()

print("[INFO] Generating VGGish embeddings for song inference set...")
step_4_1_Gen_vggish_embed.generate_vggish_embeddings(
    mp3_root=SONG_DIR,
    output_dir=VGGISH_SONG_OUTPUT_DIR,
    metadata_path=VGGISH_SONG_METADATA
)

step_4_2_Vggish_Class.evaluate_on_new_embeddings(
    model_path=VGGISH_RF_MODEL,
    embedding_dir=VGGISH_SONG_OUTPUT_DIR,
    metadata_file=VGGISH_SONG_METADATA,
    report_dir=VGGISH_RF_SONG_REPORT_DIR
)


# Step 6: Inference loop
results = []
for genre_dir in os.listdir(SONG_DIR):
    genre_path = os.path.join(SONG_DIR, genre_dir)
    if not os.path.isdir(genre_path):
        continue

    for fname in os.listdir(genre_path):
        if not fname.endswith(".mp3"):
            continue

        base = os.path.splitext(fname)[0]
        fpath = os.path.join(genre_path, fname)
        spectro_path = os.path.join(SPECTRO_DIR, genre_dir, f"{base}.png")
        png_filename = f"{base}.png"

        try:
            mfcc_vector = extract_mfcc(fpath).reshape(1, -1)
            rf_pred = rf_model.predict(mfcc_vector)[0]
            cnn_pred = cnn_df.loc[cnn_df['file'] == png_filename, 'CNN'].values[0] \
                if png_filename in cnn_df['file'].values else "N/A"

            results.append({
                "file": fname,
                "true_genre": genre_dir,
                "RF": rf_pred,
                "CNN": cnn_pred,
                "spectrogram_path": spectro_path
            })
        except Exception as e:
            print(f"Error processing {fname}: {e}")


# Step 7: Save combined predictions
df = pd.DataFrame(results)
df.to_csv(COMBINED_OUTPUT_PATH, index=False)

# Step 8: Evaluation (RF and CNN)
for model in ["RF", "CNN"]:
    if model not in df.columns:
        continue

    y_true = df["true_genre"]
    y_pred = df[model]
    labels_sorted = sorted(set(y_true) | set(y_pred))

    report = classification_report(
        y_true, y_pred, labels=labels_sorted, zero_division=0)
    with open(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR,
                           f"{model.lower()}_classification_report.txt"), "w") as f:
        f.write(report)

    cm = confusion_matrix(y_true, y_pred, labels=labels_sorted)
    plt.figure(figsize=(10, 8))
    plt.imshow(cm, cmap='Blues', interpolation='nearest')
    plt.title(f"Confusion Matrix - {model}")
    plt.colorbar()
    plt.xticks(np.arange(len(labels_sorted)),
               labels_sorted, rotation=45, ha='right')
    plt.yticks(np.arange(len(labels_sorted)), labels_sorted)
    plt.xlabel("Predicted")
    plt.ylabel("True")
    plt.tight_layout()
    plt.savefig(os.path.join(RF_OUTPUT_DIR if model == "RF" else CNN_OUTPUT_DIR,
                             f"{model.lower()}_confusion_matrix.png"))
    plt.close()

    precision, recall, f1, _ = precision_recall_fscore_support(
        y_true, y_pred, labels=labels_sorted, zero_division=0)
    metrics = {"Precision": precision, "Recall": recall, "F1-Score": f1}

    for metric_name, values in metrics.items():
        plt.figure(figsize=(10, 5))
        plt.bar(labels_sorted, values)
        plt.title(f"{metric_name} per Genre - {model}")
        plt.ylabel(metric_name)
        plt.xticks(rotation=45, ha='right')
        plt.tight_layout()
        fname = f"{model.lower()}_{metric_name.lower().replace('-', '_')}_bar.png"
        plt.savefig(os.path.join(RF_OUTPUT_DIR if model ==
                    "RF" else CNN_OUTPUT_DIR, fname))
        plt.close()

[INFO] Generating spectrograms from: ../data/Songs/
There may be some errors, this is expected


Processing 000: 100%|██████████| 62/62 [00:00<00:00, 62078.50it/s]
Processing 001: 100%|██████████| 60/60 [00:00<00:00, 45772.69it/s]
Processing 002: 100%|██████████| 4/4 [00:00<?, ?it/s]
Processing 003: 100%|██████████| 54/54 [00:00<00:00, 53991.04it/s]
Processing 004: 100%|██████████| 71/71 [00:00<00:00, 71022.08it/s]
Processing 005: 100%|██████████| 17/17 [00:00<00:00, 17017.46it/s]
Processing 006: 100%|██████████| 71/71 [00:00<00:00, 35355.05it/s]
Processing 007: 100%|██████████| 37/37 [00:00<?, ?it/s]
Processing 008: 100%|██████████| 10/10 [00:00<?, ?it/s]
Processing 009: 100%|██████████| 23/23 [00:00<00:00, 15264.08it/s]
Processing 010: 100%|██████████| 65/65 [00:00<00:00, 32398.07it/s]
Processing 011: 100%|██████████| 90/90 [00:00<00:00, 87787.76it/s]
Processing 012: 100%|██████████| 67/67 [00:00<00:00, 67068.82it/s]
Processing 013: 100%|██████████| 43/43 [00:00<00:00, 43023.63it/s]
Processing 014: 100%|██████████| 61/61 [00:00<?, ?it/s]
Processing 015: 100%|██████████| 20/20 [0

[ERROR] 098567: Unspecified internal error.


Processing 099: 100%|██████████| 45/45 [00:00<00:00, 44971.09it/s]


[ERROR] 099134: Error opening 'c:\\Users\\ranuL\\Documents\\GitHub\\GenreWave\\data/fma_small\\099\\099134.mp3': File does not exist or is not a regular file (possibly a pipe?).


Processing 100: 100%|██████████| 22/22 [00:00<?, ?it/s]
Processing 101: 100%|██████████| 10/10 [00:00<?, ?it/s]
Processing 102: 100%|██████████| 4/4 [00:00<?, ?it/s]
Processing 103: 100%|██████████| 9/9 [00:00<00:00, 9015.70it/s]
Processing 104: 100%|██████████| 28/28 [00:00<00:00, 27962.03it/s]
Processing 105: 100%|██████████| 49/49 [00:00<?, ?it/s]
Processing 106: 100%|██████████| 33/33 [00:00<?, ?it/s]
Processing 107: 100%|██████████| 103/103 [00:00<00:00, 103056.61it/s]
Processing 108: 100%|██████████| 143/143 [00:00<00:00, 71564.91it/s]


[ERROR] 108925: Error opening 'c:\\Users\\ranuL\\Documents\\GitHub\\GenreWave\\data/fma_small\\108\\108925.mp3': File does not exist or is not a regular file (possibly a pipe?).


Processing 109: 100%|██████████| 61/61 [00:00<00:00, 61033.53it/s]
Processing 110: 100%|██████████| 99/99 [00:00<00:00, 99030.79it/s]
Processing 111: 100%|██████████| 77/77 [00:00<00:00, 76968.88it/s]
Processing 112: 100%|██████████| 59/59 [00:00<00:00, 53668.17it/s]
Processing 113: 100%|██████████| 102/102 [00:00<00:00, 51052.39it/s]
Processing 114: 100%|██████████| 128/128 [00:00<00:00, 51057.62it/s]
Processing 115: 100%|██████████| 76/76 [00:00<00:00, 75251.91it/s]
Processing 116: 100%|██████████| 90/90 [00:00<00:00, 44933.62it/s]
Processing 117: 100%|██████████| 51/51 [00:00<00:00, 43896.88it/s]
Processing 118: 100%|██████████| 64/64 [00:00<00:00, 31462.20it/s]
Processing 119: 100%|██████████| 58/58 [00:00<?, ?it/s]
Processing 120: 100%|██████████| 86/86 [00:00<00:00, 140792.41it/s]
Processing 121: 100%|██████████| 75/75 [00:00<00:00, 75005.44it/s]
Processing 122: 100%|██████████| 115/115 [00:00<00:00, 38357.45it/s]
Processing 123: 100%|██████████| 83/83 [00:00<?, ?it/s]
Processing

[ERROR] 133297: Error opening 'c:\\Users\\ranuL\\Documents\\GitHub\\GenreWave\\data/fma_small\\133\\133297.mp3': File does not exist or is not a regular file (possibly a pipe?).


Processing 134: 100%|██████████| 78/78 [00:00<00:00, 77987.06it/s]
Processing 135: 100%|██████████| 41/41 [00:00<00:00, 81308.02it/s]
Processing 136: 100%|██████████| 30/30 [00:00<00:00, 30002.17it/s]
Processing 137: 100%|██████████| 62/62 [00:00<00:00, 30998.55it/s]
Processing 138: 100%|██████████| 59/59 [00:00<?, ?it/s]
Processing 139: 100%|██████████| 51/51 [00:00<00:00, 25495.77it/s]
Processing 140: 100%|██████████| 44/44 [00:00<?, ?it/s]
Processing 141: 100%|██████████| 65/65 [00:00<?, ?it/s]
Processing 142: 100%|██████████| 80/80 [00:00<00:00, 80005.80it/s]
Processing 143: 100%|██████████| 43/43 [00:00<00:00, 43023.63it/s]
Processing 144: 100%|██████████| 65/65 [00:00<00:00, 27054.66it/s]
Processing 145: 100%|██████████| 89/89 [00:00<00:00, 89155.26it/s]
Processing 146: 100%|██████████| 37/37 [00:00<00:00, 36993.86it/s]
Processing 147: 100%|██████████| 25/25 [00:00<00:00, 24972.04it/s]
Processing 148: 100%|██████████| 86/86 [00:00<00:00, 85903.82it/s]
Processing 149: 100%|███████


Successfully processed: 0
Failed to process: 4
Saved metadata to c:\Users\ranuL\Documents\GitHub\GenreWave\data\metadata.csv
[INFO] Generating VGGish embeddings for song inference set...


  features = model(torch.tensor(mel).float())
Processing Electronic: 100%|██████████| 13/13 [00:04<00:00,  2.80it/s]
Processing Experimental: 100%|██████████| 10/10 [00:00<?, ?it/s]
Processing Folk: 100%|██████████| 12/12 [00:00<00:00, 12006.60it/s]
Processing Hip-Hop: 100%|██████████| 10/10 [00:00<?, ?it/s]
Processing Instrumental: 0it [00:00, ?it/s]
Processing International: 0it [00:00, ?it/s]
Processing Pop: 100%|██████████| 10/10 [00:00<00:00, 10003.11it/s]
Processing Rock: 100%|██████████| 100/100 [00:00<00:00, 100031.10it/s]


Successfully processed: 2
Failed to process: 0
Saved metadata to ../reports/step_final_classify/vggish_metadata_songs.csv
[INFO] Evaluating on new embeddings...





ValueError: invalid literal for int() with base 10: 'Abandoned_Culture_-_Solxis'