Import et lecture CSV

In [6]:
%pip install -r ../requirements.txt

Note: you may need to restart the kernel to use updated packages.


In [1]:
import os
import pandas as pd
import librosa
import librosa.display
import matplotlib.pyplot as plt
import numpy as np
from tqdm import tqdm

In [2]:
# Chargement des métadonnées
csv_path = "../generate/new_metadata.csv"
metadata = pd.read_csv(csv_path)

# filtrage des sons 
metadata = metadata[metadata["type"].astype(str).str.contains("song|call", na=False)]
metadata = metadata[metadata["rating"] >= 4]
metadata.reset_index(drop=True, inplace=True)
# Vérifions les colonnes
metadata.head()


Unnamed: 0,primary_label,filename,type,rating,duration,samplerate
0,barswa,barswa/XC113914.ogg,song,5.0,5,32000
1,barswa,barswa/XC129647.ogg,song,5.0,5,32000
2,barswa,barswa/XC132406.ogg,song,5.0,5,32000
3,barswa,barswa/XC133096.ogg,song,5.0,5,32000
4,barswa,barswa/XC133802.ogg,song,5.0,5,32000


Partie 2 : définir le dossier de sortie     

In [3]:
# dossier où les images seront stockées
output_base = "../Spectrograms"
os.makedirs(output_base, exist_ok=True)


Partie 3 : Fonction de génération de spectrogramme

In [5]:
def save_mel_spectrogram(audio_path, output_path):
    try:
        y, sr = librosa.load(audio_path, sr=22050, duration=10)  # limiter à 10 secondes
        S = librosa.feature.melspectrogram(y=y, sr=sr, n_mels=128)
        S_DB = librosa.power_to_db(S, ref=np.max)

        plt.figure(figsize=(3, 3))
        librosa.display.specshow(S_DB, sr=sr, cmap='magma')
        plt.axis('off')
        plt.tight_layout()
        plt.savefig(output_path, bbox_inches='tight', pad_inches=0)
        plt.close()
        return True
    except Exception as e:
        print(f"❌ Erreur lors du traitement de {audio_path} : {e}")
        return False


Partie 4 : Boucle sur tous les fichiers audio python

In [7]:
%pip install ipywidgets


Note: you may need to restart the kernel to use updated packages.


In [6]:
import gc

success = 0
failed = 0

for idx, row in tqdm(metadata.iterrows(), total=len(metadata)):
    filename = row['filename']  # Exemple : "barswa/XC113914.ogg"
    label = row['primary_label']  # Exemple : "barswa"
    
    audio_path = os.path.join("..", "bird_audio", filename)
    output_dir = os.path.join(output_base, label)
    os.makedirs(output_dir, exist_ok=True)

    output_file = os.path.join(output_dir, os.path.basename(filename).replace(".ogg", ".png"))

    if os.path.exists(audio_path):
        try:
            ok = save_mel_spectrogram(audio_path, output_file)
            if ok:
                success += 1
            else:
                failed += 1
        except Exception as e:
            print(f"❌ Erreur pour {audio_path} : {e}")
            failed += 1
    else:
        print(f"❌ Fichier introuvable : {audio_path}")
        failed += 1

    gc.collect()  # Libération mémoire à chaque itération

print(f"✅ Terminé : {success} réussis, {failed} échoués.")


100%|██████████| 2986/2986 [1:05:09<00:00,  1.31s/it]

✅ Terminé : 2986 réussis, 0 échoués.



