In [None]:
# L 4-23-25
# notebooks/3.1_Generate_Spectrograms.ipynb

In [None]:
# notebooks/3.1_Generate_Spectrograms.ipynb

import os
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
from PIL import Image
import numpy as np

# Parameters
USED_TRACKS_CSV = "../reports/2_MFCC_RF_Classifier/used_tracks.csv"
AUDIO_DIR = "../data/fma_small"
OUTPUT_DIR = "../spectrograms"
IMG_SIZE = 128

# Create output root directory
os.makedirs(OUTPUT_DIR, exist_ok=True)

# Load used tracks metadata
used_df = pd.read_csv(USED_TRACKS_CSV)
print(f"Loaded {len(used_df)} usable tracks from CSV")

# Loop over tracks
for i, row in used_df.iterrows():
    track_id = str(row['track_id']).zfill(6)
    genre = row['genre']
    subfolder = track_id[:3]
    audio_path = os.path.join(AUDIO_DIR, subfolder, f"{track_id}.mp3")
    output_path = os.path.join(OUTPUT_DIR, genre)
    os.makedirs(output_path, exist_ok=True)
    save_path = os.path.join(output_path, f"{track_id}.png")

    try:
        y, sr = librosa.load(audio_path, sr=None, duration=30)
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_dB = librosa.power_to_db(S, ref=np.max)

        # Save as RGB image
        plt.figure(figsize=(1.28, 1.28), dpi=100)  # 128x128 pixels
        librosa.display.specshow(S_dB, sr=sr, cmap='viridis')
        plt.axis('off')
        plt.tight_layout(pad=0)
        plt.savefig(save_path, bbox_inches='tight', pad_inches=0)
        plt.close()

        # Ensure RGB and correct size
        img = Image.open(save_path).convert('RGB')
        img = img.resize((IMG_SIZE, IMG_SIZE))
        img.save(save_path)

        #print(f"Saved: {save_path}")

    except Exception as e:
        print(f"Failed to process {track_id}: {e}")

Loaded 7994 usable tracks from CSV
