In [11]:
import os
import librosa
import pandas as pd
from tqdm import tqdm

audio_dir = "E:/birdclef-2024/processed_audio"
output_csv = "night_flagged_audio.csv"

flat_thresh = 0.15
rms_thresh = 0.02

results = []

# ✅ Recursively collect all .wav files
all_files = []
for root, _, files in os.walk(audio_dir):
    for f in files:
        if f.lower().endswith(".wav"):
            all_files.append(os.path.join(root, f))

print(f"🔍 Found {len(all_files)} .wav files.")

for fpath in tqdm(all_files):
    fname = os.path.relpath(fpath, audio_dir)  # Save relative path
    try:
        y, sr = librosa.load(fpath, sr=32000)
        flatness = float(librosa.feature.spectral_flatness(y=y).mean())
        rms = float(librosa.feature.rms(y=y).mean())
        night_predicted = 1 if (flatness > flat_thresh and rms < rms_thresh) else 0

        results.append({
            "filename": fname,
            "flatness": flatness,
            "rms": rms,
            "night_predicted": night_predicted
        })

    except Exception as e:
        print(f"❌ Failed to load {fname}: {e}")

# ✅ Save output
if results:
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"✅ Saved {len(df)} entries to {output_csv}")
    print(f"🌙 Night-flagged chunks: {df[df['night_predicted'] == 1].shape[0]}")
else:
    print("⚠️ Still no audio processed — check file access or format.")


🔍 Found 329747 .wav files.


100%|██████████| 329747/329747 [1:55:11<00:00, 47.71it/s]  


✅ Saved 329747 entries to night_flagged_audio.csv
🌙 Night-flagged chunks: 10132


In [15]:
import os
import librosa
import pandas as pd
from tqdm import tqdm

audio_dir = "E:/birdclef-2024/unlabeled_segments_filtered"
output_csv = "night_flagged_audio.csv"

flat_thresh = 0.15
rms_thresh = 0.02

results = []

# ✅ Recursively collect all .wav files
all_files = []
for root, _, files in os.walk(audio_dir):
    for f in files:
        if f.lower().endswith(".wav"):
            all_files.append(os.path.join(root, f))

print(f"🔍 Found {len(all_files)} .wav files.")

for fpath in tqdm(all_files):
    fname = os.path.relpath(fpath, audio_dir)  # Save relative path
    try:
        y, sr = librosa.load(fpath, sr=32000)
        flatness = float(librosa.feature.spectral_flatness(y=y).mean())
        rms = float(librosa.feature.rms(y=y).mean())
        night_predicted = 1 if (flatness > flat_thresh and rms < rms_thresh) else 0

        results.append({
            "filename": fname,
            "flatness": flatness,
            "rms": rms,
            "night_predicted": night_predicted
        })

    except Exception as e:
        print(f"❌ Failed to load {fname}: {e}")

# ✅ Save output
if results:
    df = pd.DataFrame(results)
    df.to_csv(output_csv, index=False)
    print(f"✅ Saved {len(df)} entries to {output_csv}")
    print(f"🌙 Night-flagged chunks: {df[df['night_predicted'] == 1].shape[0]}")
else:
    print("⚠️ Still no audio processed — check file access or format.")


🔍 Found 81279 .wav files.


100%|██████████| 81279/81279 [39:28<00:00, 34.32it/s]


✅ Saved 81279 entries to night_flagged_audio.csv
🌙 Night-flagged chunks: 8071
