In [None]:
# ruff: noqa  # Expect this notebook to be of bad quality

import os
import wave

import pandas as pd
from IPython.display import Audio, display

In [None]:
def add_wav_durations(path: str, metadata_csv: str) -> pd.DataFrame:
    """
    Adds a 'Duration' column (in seconds) to a metadata CSV based on .wav file durations.

    Parameters
    ----------
    path : str
        Path to the directory containing .wav files.
    metadata_csv : str
        Path to the input and output metadata CSV file.
    Returns
    -------
    pd.DataFrame
        Updated DataFrame with an added 'Duration' column.
    """
    # Load metadata
    metadata = pd.read_csv(metadata_csv)

    # Prepare a list for durations
    durations = []

    # Iterate through metadata rows
    for _, row in metadata.iterrows():
        filename = str(row.get("fname")) + ".wav"
        if not filename:
            durations.append(None)
            continue

        wav_path = os.path.join(path, filename)
        if not os.path.exists(wav_path):
            durations.append(None)
            continue

        try:
            with wave.open(wav_path, "r") as wav_file:
                frames = wav_file.getnframes()
                rate = wav_file.getframerate()
                duration = frames / float(rate)
                durations.append(duration)
        except Exception as e:
            print(f"Error processing {filename}: {e}")
            durations.append(None)

    # Add the durations column
    metadata["Duration"] = durations

    # Save updated CSV if requested
    metadata.to_csv(metadata_csv, index=False)

    return metadata


def sample_fsd_file(file_path: str):
    audio_path = "../data/FSD50K/" + file_path + ".wav"
    display(Audio(audio_path, rate=44100))

In [46]:
path = "../data/FSD50K/controls"
csv_path = "../data/metadata/fsd50k_triggers_metadata.csv"

# _ = add_wav_durations(path = path, metadata_csv = csv_path)

In [47]:
eval_metadata = pd.read_csv(csv_path)
eval_metadata[eval_metadata["Duration"] >= 15]
# plt.bar(x = label_counts['labels'], height = label_counts['counts'])

Unnamed: 0,fname,labels,mids,Duration
4,201589,typing,/m/0c2wf,23.830227
5,201591,typing,/m/0c2wf,16.458866
38,394792,water_drops,/m/07r10fb,26.626599
53,137247,plastic_crumpling,/t/dd00112,20.238685
61,137257,plastic_crumpling,/t/dd00112,25.370159
...,...,...,...,...
1611,87507,knife_cutting,/m/07pn_8q,15.557166
1612,404969,knife_cutting,/m/07pn_8q,27.304830
1613,433712,knife_cutting,/m/07pn_8q,23.245147
1614,325223,knife_cutting,/m/07pn_8q,22.512018


For FSD50K_dev, 760 samples are longer than 5 seconds, 429 are longer than 10, and 273 are longer than 15. 

For FSD50K_eval, 238 samples are longer than 5 seconds, 138 are longer than 10, and 87 are longer than 15.

- Clearing throat clip from eval 263935 does not have a cough until 19th second

In [48]:
eval_metadata[(eval_metadata["Duration"] >= 15) & (eval_metadata["labels"] == "clearing_throat")]

Unnamed: 0,fname,labels,mids,Duration
800,45574,clearing_throat,/m/01b_21,20.079456
801,45575,clearing_throat,/m/01b_21,18.805261
803,45577,clearing_throat,/m/01b_21,22.476916
805,171542,clearing_throat,/m/01b_21,22.047347
808,47135,clearing_throat,/m/01b_21,17.45712
809,366627,clearing_throat,/m/01b_21,25.018299
810,53797,clearing_throat,/m/01b_21,19.176803
835,180044,clearing_throat,/m/01b_21,27.398549
867,196283,clearing_throat,/m/01b_21,28.969705
879,382693,clearing_throat,/m/01b_21,21.475465


In [53]:
sample_fsd_file("218946")

../data/FSD50K/218946.wav
