In [None]:
import os
import librosa
import numpy as np
from PIL import Image

INPUT_DIR = './birdclef-2025/train_soundscapes'
OUTPUT_DIR = './soundscape_melspec_images'
os.makedirs(OUTPUT_DIR, exist_ok=True)

def audio_to_melspec_image(audio_path, sr=32000, duration=5, hop_length=512, n_mels=128):
    y, _ = librosa.load(audio_path, sr=sr, mono=True, duration=duration)
    melspec = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=n_mels, hop_length=hop_length)
    melspec_db = librosa.power_to_db(melspec, ref=np.max)
    melspec_norm = (melspec_db - melspec_db.min()) / (melspec_db.max() - melspec_db.min()) * 255
    img = Image.fromarray(melspec_norm.astype(np.uint8)).convert("RGB")
    return img

for fname in os.listdir(INPUT_DIR):
    if not fname.endswith('.ogg'):
        continue
    try:
        in_path = os.path.join(INPUT_DIR, fname)
        img = audio_to_melspec_image(in_path)
        out_path = os.path.join(OUTPUT_DIR, fname.replace('.ogg', '.png'))
        img.save(out_path)
        print(f"Saved: {out_path}")
    except Exception as e:
        print(f"Error processing {fname}: {e}")