In [1]:
import pandas as pd
from pathlib import Path
import os
import numpy as np
import librosa
import soundfile as sf

from utils.segmentation import yolo_to_tf_centered



In [None]:
# ======================
# CONFIG
# ======================

LABEL_DIR = "path/to/labels/directory/"  # dossier YOLO prédictions
CSV_PATH = "path/to/annotations/"
CSV_INPUT = "annotations_trills.csv"
CSV_PATH = Path(CSV_PATH) / CSV_INPUT
OUT_CSV = "annotations_trills_with_pred.csv"
OUT_CSV = Path(CSV_PATH).parent / OUT_CSV

sr = 44100
win_len = 2.0
f_max_global = sr / 2


### Add predictions of YOLO model to Annotated CSV

In [None]:
# ======================
# MAIN
# ======================

df = pd.read_csv(CSV_PATH)

df["trill_t_start_pred"] = np.nan
df["trill_t_end_pred"] = np.nan
df["trill_f_min_pred"] = np.nan
df["trill_f_max_pred"] = np.nan

for idx, row in df.iterrows():

    stem = Path(row["file_name_radical"]).stem
    seg_id = row["segment_id"]

    txt_path = Path(LABEL_DIR) / f"{stem}_seg{seg_id}.txt"

    if idx == 0:
        print(f"Processing {txt_path} ")

    if not txt_path.exists() or txt_path.stat().st_size == 0:
        continue

    with open(txt_path) as f:
        line = f.readline().strip()

    parts = line.split()

    if len(parts) < 5:
        continue

    _, xc, yc, w, h = map(float, parts[:5])

    # YOLO → relatif segment
    t0_rel, t1_rel, f0, f1 = yolo_to_tf_centered(xc, yc, w, h, win_len, f_max_global)

    # relatif → absolu audio
    seg_start = row["time_start"]

    df.loc[idx, "trill_t_start_pred"] = t0_rel + seg_start
    df.loc[idx, "trill_t_end_pred"] = t1_rel + seg_start
    df.loc[idx, "trill_f_min_pred"] = f0
    df.loc[idx, "trill_f_max_pred"] = f1

# ======================
# SAVE
# ======================

df.to_csv(OUT_CSV, index=False)

print("✅ CSV enrichi sauvegardé")


### Export WAV files from the detected segments

In [None]:
def export_trill_audio_segments(
    df,
    audio_dir,
    output_dir,
    sample_rate=None,
    padding_ratio=0.05
):
    """
    Export trill audio segments with temporal padding.

    Parameters
    ----------
    df : pd.DataFrame
        Must contain columns:
        ['file_name', 'file_name_radical', 'time_start',
         'trill_t_start', 'trill_t_end']
    audio_dir : str
        Directory containing the original audio files
    output_dir : str
        Directory where cropped files will be written
    sample_rate : int or None
        If None, keep native sampling rate
    padding_ratio : float
        Fraction of trill duration added on each side
    """

    os.makedirs(output_dir, exist_ok=True)

    empty_files = []
    for _, row in df.iterrows():
        audio_path = os.path.join(audio_dir, row["file_name_radical"])

        if not os.path.exists(audio_path):
            print(f"Missing file: {audio_path}")
            continue

        t_start = row["trill_t_start_pred"]
        t_end = row["trill_t_end_pred"]

        if np.isnan(t_start) or np.isnan(t_end) or t_start >= t_end:
            print(f"Invalid times for {audio_path}: start={t_start}, end={t_end}")
            empty_files.append(audio_path)
            continue

        if row["file_name_radical"] in ["Luscinia_svecica_654083.wav"]:
            print(f"Skipping file with known issues: {audio_path}")
            print(f"Times: start={t_start}, end={t_end}")
            continue

        # Load audio
        y, sr = librosa.load(audio_path, sr=sample_rate, mono=True)

        # Padding
        duration = t_end - t_start
        pad = padding_ratio * duration

        t_start_pad = max(0, t_start - pad)
        t_end_pad = min(len(y) / sr, t_end + pad)

        # Convert to samples
        s_start = int(t_start_pad * sr)
        s_end = int(t_end_pad * sr)

        y_crop = y[s_start:s_end]

        output_file_name = f"{row['file_name_radical'].split('.')[0]}_seg{row['segment_id']}.wav"
        # Output path
        out_path = os.path.join(output_dir, output_file_name)

        # Write audio
        sf.write(out_path, y_crop, sr)

    if empty_files:
        print(f"Number of files with invalid times: {len(empty_files)}")
        


In [None]:
OUTPUT_DIR = "path/to/trill_segments_detected/"
AUDIO_DIR = "path/to/audio/files/"
DATA_DIR = "path/to/annotation/file/"

trill_rate_annotation_file = "annotations_trills_with_pred.csv"
df_annotation_rhythm = pd.read_csv(os.path.join(DATA_DIR, trill_rate_annotation_file))

export_trill_audio_segments(
    df=df_annotation_rhythm,
    audio_dir=AUDIO_DIR,
    output_dir=OUTPUT_DIR,
    sample_rate=None,     # conserve le SR natif
    padding_ratio=0.05
)
