In [1]:
# The code in this notebook will be expected to be of bad quality (without a linter):
# ruff: noqa
%reload_ext autoreload
%autoreload 2

In [2]:
from pathlib import Path
import numpy as np
import pandas as pd
import IPython.display

In [22]:
ALL_SAMPLES_DIR = Path("./data/FSD50K_eval")
ALL_DEV_SAMPLES_DIR = Path("./data/FSD50K")
ANNOTATE_SAMPLES_DIR = Path("./data/FSD50K_to_annotate")
METADATA_FILE = Path("./data/metadata/fsd50k_eval_triggers_metadata.csv")
METADATA_DEV_FILE = Path("./data/metadata/fsd50k_eval_triggers_metadata.csv")
N_SAMPLES = 50
SEED = 42

In [16]:
metadata = pd.read_csv(METADATA_FILE)
metadata["file_path"] = metadata["fname"].apply(lambda x: ALL_SAMPLES_DIR / f"{x}.wav")
metadata

Unnamed: 0,fname,labels,mids,Duration,file_path
0,424770,water_drops,/m/07r10fb,18.226213,data/FSD50K_eval/424770.wav
1,199925,plastic_crumpling,/t/dd00112,11.746395,data/FSD50K_eval/199925.wav
2,94671,plastic_crumpling,/t/dd00112,15.229546,data/FSD50K_eval/94671.wav
3,152992,plastic_crumpling,/t/dd00112,3.174898,data/FSD50K_eval/152992.wav
4,45824,plastic_crumpling,/t/dd00112,18.018005,data/FSD50K_eval/45824.wav
...,...,...,...,...,...
384,188548,human_breathing,/m/09hlz4,0.445692,data/FSD50K_eval/188548.wav
385,203864,human_breathing,/m/09hlz4,0.386168,data/FSD50K_eval/203864.wav
386,325657,knife_cutting,/m/07pn_8q,1.938980,data/FSD50K_eval/325657.wav
387,389181,knife_cutting,/m/07pn_8q,4.966667,data/FSD50K_eval/389181.wav


In [5]:
# sample N_SAMPLES files from the eval set
all_files = sorted(list(ALL_SAMPLES_DIR.glob("*.wav")))
rng = np.random.default_rng(SEED)
random_sampled_files = rng.choice(all_files, size=N_SAMPLES, replace=False)

random_samples = metadata[metadata["file_path"].isin(random_sampled_files)].reset_index(drop=True)
random_samples = random_samples.sort_values("fname").reset_index(drop=True)
random_samples

Unnamed: 0,fname,labels,mids,Duration,file_path,length_sec
0,18621,knife_cutting,/m/01lsmm,26.0,data/FSD50K_eval/18621.wav,26.0
1,21113,human_breathing,/m/0lyf6,1.864626,data/FSD50K_eval/21113.wav,1.864626
2,41326,plastic_crumpling,/m/07plct2,3.104671,data/FSD50K_eval/41326.wav,3.104671
3,47046,typing,/m/0c2wf,0.777868,data/FSD50K_eval/47046.wav,0.777868
4,48202,plastic_crumpling,/m/07plct2,1.190045,data/FSD50K_eval/48202.wav,1.190045
5,85691,plastic_crumpling,/t/dd00112,3.64,data/FSD50K_eval/85691.wav,3.64
6,99692,typing,/m/0c2wf,19.877959,data/FSD50K_eval/99692.wav,19.877959
7,122801,typing,/m/01m2v,12.878367,data/FSD50K_eval/122801.wav,12.878367
8,126185,chewing_gum,/m/03cczk,10.736327,data/FSD50K_eval/126185.wav,10.736327
9,131323,plastic_crumpling,/m/07plct2,1.707279,data/FSD50K_eval/131323.wav,1.707279


In [6]:
# COPY all sample to ANNOTATE_SAMPLES_DIR
def copy_samples(path: Path, samples: pd.DataFrame) -> None:
    path.mkdir(parents=True, exist_ok=True)
    for file_path in samples["file_path"]:
        dest_path = path / file_path.name
        if not dest_path.exists():
            dest_path.write_bytes(file_path.read_bytes())


copy_samples(ANNOTATE_SAMPLES_DIR / "random", random_samples)

In [7]:
def print_template(samples: pd.DataFrame) -> None:
    for _, r in samples.iterrows():
        print(
            '{"fname": '
            + str(r["fname"])
            + ', "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # '
            + r["labels"]
        )


# print_template(random_samples)

In [8]:
# These are made manually
# - fname: the file name without extension
# - expected_trigger: whether the expected trigger sound is present
# - other_triggers: whether other trigger sounds are present
# - has_noise: whether some small background noise is present (False if the sound is very clean)
# - other_sounds: whether other non-trigger, non-noise sounds are present
# - notes: any additional notes

overall_notes = """

General: Sound levels can be very different

chewing_gum: Often not sure if it is gum or some other chewing sound        

knife_cutting: Often a scissor sound

"""

annotations = {}
annotations["random"] = [
    {
        "fname": 18621,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 21113,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # human_breathing
    {
        "fname": 41326,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 47046,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": "Type writer rewinding, not the clicking sounds",
    },  # typing
    {
        "fname": 48202,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 85691,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 99692,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 122801,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 126185,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Do not know if it is gum, but it is chewing",
    },  # chewing_gum
    {
        "fname": 131323,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 134025,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 135872,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 137164,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": "Multiple knives at the same time",
    },  # knife_cutting
    {
        "fname": 144172,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Do not know if it is gum, but it is chewing",
    },  # chewing_gum
    {
        "fname": 159456,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # clearing_throat
    {
        "fname": 167699,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 170546,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 256564,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 258256,
        "expected_trigger": True,
        "other_triggers": ["typing"],
        "has_noise": True,
        "other_sounds": False,
        "notes": "There is some background noise that could sound like typing",
    },  # clearing_throat
    {
        "fname": 268143,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 269338,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 280243,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # water_drops
    {
        "fname": 326556,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Does sound like chewing something that is not gum",
    },  # chewing_gum
    {
        "fname": 327059,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 327539,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 330802,
        "expected_trigger": True,
        "other_triggers": ["human_breathing"],
        "has_noise": False,
        "other_sounds": False,
        "notes": "There is soooooomething  that sounds like breathing, although not 100 percent sure",
    },  # plastic_crumpling
    {
        "fname": 337623,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 340154,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 340899,
        "expected_trigger": False,
        "other_triggers": ["typing"],
        "has_noise": False,
        "other_sounds": False,
        "notes": "Sounds more like a typewriter rewinding",
    },  # plastic_crumpling
    {
        "fname": 352413,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 361148,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # human_breathing
    {
        "fname": 367177,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 379418,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 382701,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 383335,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # clearing_throat
    {
        "fname": 389181,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 390171,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 391500,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 393059,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 393733,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # clearing_throat
    {
        "fname": 403462,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Scissors",
    },  # knife_cutting
    {
        "fname": 404402,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 406090,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 407709,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 408449,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 408612,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 408819,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 424752,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 431599,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # clearing_throat
    {
        "fname": 433599,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": "Kinda sounds like something from a typewriter",
    },  # typing
]

In [9]:
# Sample from the chewing_gum
rng = np.random.default_rng(SEED * 2)
chewing_samples = (
    metadata[
        metadata["file_path"].isin(
            rng.choice(list(metadata[metadata["labels"] == "chewing_gum"]["file_path"]), size=20, replace=False)
        )
    ]
    .sort_values("fname")
    .reset_index(drop=True)
)
copy_samples(ANNOTATE_SAMPLES_DIR / "chewing", chewing_samples)
# print_template(chewing_samples)

In [10]:
annotations["chewing"] = [
    {
        "fname": 35697,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 48933,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 126185,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "A lot of noise",
    },  # chewing_gum
    {
        "fname": 140297,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 140298,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 144171,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 246206,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Sounds like he is eating metal",
    },  # chewing_gum
    {
        "fname": 326464,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 335013,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 349096,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 365705,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 366382,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 366548,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": "Very low sound",
    },  # chewing_gum
    {
        "fname": 377572,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 403191,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 404399,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 404402,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 408822,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 412768,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 430011,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": ["A hmmm."],
        "notes": "A hmm in the beginning",
    },  # chewing_gum
]

In [None]:
longest_samples = metadata.sort_values("Duration", ascending=False).head(20).sort_values("fname").reset_index(drop=True)
copy_samples(ANNOTATE_SAMPLES_DIR / "longest", longest_samples)
# print_template(longest_samples)

In [12]:
annotations["longest"] = [
    {
        "fname": 18621,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Around 4s. complete silence in the end",
    },  # knife_cutting
    {
        "fname": 23444,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 84393,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 84883,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 85295,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Has 4s. noisy silence in the beginning",
    },  # typing
    {
        "fname": 94643,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 108449,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 122802,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 149818,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 164747,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 181908,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 206147,
        "expected_trigger": False,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": ["Putting soda in a cup"],
        "notes": "Maybe they are swallowing, but you cannot really hear it",
    },  # swallowing
    {
        "fname": 210304,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 246057,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 318923,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 330301,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 337081,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # plastic_crumpling
    {
        "fname": 390165,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # knife_cutting
    {
        "fname": 407709,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # typing
    {
        "fname": 408263,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # water_drops
]

In [13]:
longest_chewing = metadata[metadata["labels"] == "chewing_gum"]
longest_chewing = longest_chewing.sort_values("length_sec", ascending=False).reset_index(drop=True)
longest_chewing = longest_chewing.head(10).sort_values("fname").reset_index(drop=True)
copy_samples(ANNOTATE_SAMPLES_DIR / "longest_chewing", longest_chewing)
# print_template(longest_chewing)

In [14]:
annotations["longest_chewing"] = [
    {
        "fname": 35697,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 140297,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 187331,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 326556,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 365705,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 366382,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 366548,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": True,
        "other_sounds": False,
        "notes": "Very low sound",
    },  # chewing_gum
    {
        "fname": 405327,
        "expected_trigger": True,
        "other_triggers": ["human_breathing"],
        "has_noise": False,
        "other_sounds": False,
        "notes": "A short human breathing around 4s. in",
    },  # chewing_gum
    {
        "fname": 408822,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
    {
        "fname": 412768,
        "expected_trigger": True,
        "other_triggers": False,
        "has_noise": False,
        "other_sounds": False,
        "notes": None,
    },  # chewing_gum
]

In [24]:
metadata_dev = pd.read_csv(METADATA_DEV_FILE)
metadata_dev["file_path"] = metadata_dev["fname"].apply(lambda x: ALL_DEV_SAMPLES_DIR / f"{x}.wav")

longest_breathing = metadata_dev[metadata_dev["labels"] == "human_breathing"]
longest_breathing = longest_breathing.sort_values("Duration", ascending=False).reset_index(drop=True)
longest_breathing = longest_breathing.head(10).sort_values("fname").reset_index(drop=True)
copy_samples(ANNOTATE_SAMPLES_DIR / "longest_breathing", longest_breathing)
print_template(longest_breathing)

{"fname": 96023, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing
{"fname": 213293, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing
{"fname": 325888, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing
{"fname": 361148, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing
{"fname": 390002, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing
{"fname": 395397, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing
{"fname": 410969, "expected_trigger": ... , "other_triggers": ... , "has_noise": ... , "other_sounds": ... , "notes": None}, # human_breathing


In [None]:
annotations["longest_breathing"] = [
    {
        "fname": 96023,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 213293,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 325888,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 361148,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 390002,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 395397,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 410969,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 427365,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 429970,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
    {
        "fname": 429999,
        "expected_trigger": ...,
        "other_triggers": ...,
        "has_noise": ...,
        "other_sounds": ...,
        "notes": None,
    },  # human_breathing
]

# Results

In [15]:
# Concat and set sample_type = k
annotations_df = pd.concat(
    [pd.DataFrame(v).assign(sample_type=k) for k, v in annotations.items()],
    ignore_index=True,
)

# join with metadata
annotations_df = annotations_df.merge(metadata, on="fname", how="left")
annotations_df.to_csv(ANNOTATE_SAMPLES_DIR / "annotations.csv")
annotations_df

Unnamed: 0,fname,expected_trigger,other_triggers,has_noise,other_sounds,notes,sample_type,labels,mids,Duration,file_path,length_sec
0,18621,True,False,True,False,,random,knife_cutting,/m/01lsmm,26.000000,data/FSD50K_eval/18621.wav,26.000000
1,21113,True,False,True,False,,random,human_breathing,/m/0lyf6,1.864626,data/FSD50K_eval/21113.wav,1.864626
2,41326,True,False,True,False,,random,plastic_crumpling,/m/07plct2,3.104671,data/FSD50K_eval/41326.wav,3.104671
3,47046,True,False,False,False,"Type writer rewinding, not the clicking sounds",random,typing,/m/0c2wf,0.777868,data/FSD50K_eval/47046.wav,0.777868
4,48202,True,False,False,False,,random,plastic_crumpling,/m/07plct2,1.190045,data/FSD50K_eval/48202.wav,1.190045
...,...,...,...,...,...,...,...,...,...,...,...,...
95,366382,True,False,False,False,,longest_chewing,chewing_gum,/m/03cczk,18.439116,data/FSD50K_eval/366382.wav,18.439116
96,366548,True,False,True,False,Very low sound,longest_chewing,chewing_gum,/m/03cczk,12.887687,data/FSD50K_eval/366548.wav,12.887687
97,405327,True,[human_breathing],False,False,A short human breathing around 4s. in,longest_chewing,chewing_gum,/m/03cczk,19.264014,data/FSD50K_eval/405327.wav,19.264014
98,408822,True,False,False,False,,longest_chewing,chewing_gum,/m/03cczk,16.703855,data/FSD50K_eval/408822.wav,16.703855
