In [None]:
from IPython.display import Audio, display
import zipfile
import os
import pandas as pd
import librosa
import soundfile as sf
import json
import random

# Playground Notebook

Playground to work with wav files and create sample binaural mixes.

In [None]:
os.chdir("/workspaces/misophonia-dataset/notebooks")
foams_df = pd.read_csv("../data/segmentation_info.csv")

print("Number of mysophonia samples: ", foams_df.shape[0])
print("Unique classes: ", foams_df["label"].unique())
print("####################################################################################################")
foams_df.tail()

Number of mysophonia samples:  50
Unique classes:  ['basketball_dribbling' 'chewing_gum' 'clearing_throat'
 'flipping_newspaper' 'human_breathing' 'knife_cutting'
 'plastic_crumpling' 'swallowing' 'typing' 'water_drops']
####################################################################################################


Unnamed: 0,id,label,duration
0,184320,basketball_dribbling,3.270003
1,580220,basketball_dribbling,3.862957
2,262507,basketball_dribbling,4.903607
3,453757,basketball_dribbling,5.097981
4,530362,basketball_dribbling,5.344224


In [56]:
wav_zip_path = "data/FOAMS_processed_audio.zip"
foams_path = "../data/FOAMS_processed_audio/"

# unzip data
if not os.path.exists("../data/FOAMS_processed_audio"):
    print("Here we go again")
    zip_ref = zipfile.ZipFile(wav_zip_path, 'r')
    zip_ref.extractall("./data")
    zip_ref.close()

In [57]:
# Read a sample
sample = foams_path + "87984_processed.wav"
sample_rate = 44100

Audio(sample, rate=sample_rate)

### Binaural Mixing

In [65]:
# Change working directory to Binamix
print("Before: ", os.getcwd())
binamix_dir = "/workspaces/misophonia-dataset/Binamix"
if os.getcwd() != binamix_dir:
    os.chdir("/workspaces/misophonia-dataset/Binamix")

print("After: ", os.getcwd())

from binamix.sadie_utilities import *

# import runpy
# from examples import binaural_mixer_example

# runpy.run_module("examples.binaural_mixer_example", run_name="__main__")


Before:  /workspaces/misophonia-dataset/Binamix
After:  /workspaces/misophonia-dataset/Binamix


In [63]:
if not os.path.exists("../data/busy_street_example.wav"):
    print("File already exists!")
    y, sr = librosa.load("../data/busy-street-traffic-309641.mp3", sr=44100)  # sr=None preserves original sample rate

    # Save as WAV
    sf.write("../data/busy_street_example.wav", y, sr)

In [64]:
busy_street_path = foams_path + "busy_street_example.wav"

Audio("../data/busy_street_example.wav", rate=sample_rate)

In [70]:
data_path = "../data"
subject_id = "D2" 
ir_type = "HRIR" # Options HRIR, BRIR
speaker_layout = "none"
sr = 44100

# Read in audio data
street, sr = librosa.load(f"{data_path}/busy_street_example.wav", sr=44100, mono=True, duration=5.097981)
basketball, sr = librosa.load(f"{foams_path}453757_processed.wav", sr=44100, mono=True, duration=5.097981)

# Create TrackObjects
track1 = TrackObject(name="street", azimuth=0, elevation=0, level=0.4, reverb=0.0, audio=street)
track2 = TrackObject(name="basketball", azimuth=300, elevation=0, level=0.8, reverb=0.0, audio=basketball)
tracks = [track1, track2]

# Make Right Ear Binaural Mix
output_right = mix_tracks_binaural(tracks, subject_id, sr, ir_type, speaker_layout, mode="auto", reverb_type="1")

Rendering Mix...
Mixing  street
Interpolation Mode: 'Auto'
Using Actual Angle to achieve angle: az 0 ele 0
-----------------------------------
Mixing  basketball
Interpolation Mode: 'Auto'
Using Actual Angle to achieve angle: az 300 ele 0
-----------------------------------


In [71]:
# Create TrackObjects
track1 = TrackObject(name="street", azimuth=0, elevation=0, level=0.4, reverb=0.0, audio=street)
track2 = TrackObject(name="basketball", azimuth=30, elevation=0, level=0.8, reverb=0.0, audio=basketball)
tracks = [track1, track2]

# Make Left Ear Binaural Mix
output_left = mix_tracks_binaural(tracks, subject_id, sr, ir_type, speaker_layout, mode="auto", reverb_type="1")

Rendering Mix...
Mixing  street
Interpolation Mode: 'Auto'
Using Actual Angle to achieve angle: az 0 ele 0
-----------------------------------
Mixing  basketball
Interpolation Mode: 'Auto'
Using Actual Angle to achieve angle: az 30 ele 0
-----------------------------------


#### Left Ear, Then Right

In [None]:
Audio(data="data/FOAMS_processed_audio/453757_processed.wav", rate = 41400)

In [None]:
Audio(output_left, rate=sr)

In [75]:
Audio(output_right, rate=sr)

# Extracting Sound Classes from Dataset Metadata

### FSDk50

In [3]:
# Change working directory to notebooks
os.chdir("/workspaces/misophonia-dataset/notebooks")

dev_path = "../data/metadata/FSD50K.metadata/collection/vocabulary_collection_dev.csv"
eval_path = "../data/metadata/FSD50K.metadata/collection/vocabulary_collection_eval.csv"

fsd50k_class_names = pd.read_csv(dev_path, header=None)[1].unique().tolist()
fsd50k_class_names.extend(pd.read_csv(eval_path, header=None)[1].unique().tolist())

fsd50k_class_names = sorted(list(set(fsd50k_class_names)))
fsd50k_class_names

['Accelerating_and_revving_and_vroom',
 'Accordion',
 'Acoustic_guitar',
 'Air_conditioning',
 'Air_horn_and_truck_horn',
 'Aircraft',
 'Aircraft_engine',
 'Alarm',
 'Alarm_clock',
 'Alto_saxophone',
 'Ambulance_(siren)',
 'Animal',
 'Applause',
 'Arrow',
 'Artillery_fire',
 'Babbling',
 'Baby_cry_and_infant_cry',
 'Baby_laughter',
 'Bark',
 'Bass_drum',
 'Bass_guitar',
 'Bassoon',
 'Bathtub_(filling_or_washing)',
 'Battle_cry',
 'Bee_and_wasp_and_etc.',
 'Bell',
 'Belly_laugh',
 'Bicycle',
 'Bicycle_bell',
 'Bird',
 'Bird_flight_and_flapping_wings',
 'Bird_vocalization_and_bird_call_and_bird_song',
 'Biting',
 'Bleat',
 'Blender',
 'Boat_and_Water_vehicle',
 'Boiling',
 'Booing',
 'Boom',
 'Bowed_string_instrument',
 'Brass_instrument',
 'Breathing',
 'Burping_and_eructation',
 'Burst_and_pop',
 'Bus',
 'Busy_signal',
 'Buzz',
 'Buzzer',
 'Camera',
 'Cap_gun',
 'Car',
 'Car_alarm',
 'Car_passing_by',
 'Cash_register',
 'Cat',
 'Cat_communication',
 'Caterwaul',
 'Cattle_and_bovinae',


### ESC-50

In [4]:
file_path = "../data/metadata/esc50.csv"
esc50_class_names = pd.read_csv(file_path)['category'].unique().tolist()
esc50_class_names = sorted(list(set(esc50_class_names)))
esc50_class_names

['airplane',
 'breathing',
 'brushing_teeth',
 'can_opening',
 'car_horn',
 'cat',
 'chainsaw',
 'chirping_birds',
 'church_bells',
 'clapping',
 'clock_alarm',
 'clock_tick',
 'coughing',
 'cow',
 'crackling_fire',
 'crickets',
 'crow',
 'crying_baby',
 'dog',
 'door_wood_creaks',
 'door_wood_knock',
 'drinking_sipping',
 'engine',
 'fireworks',
 'footsteps',
 'frog',
 'glass_breaking',
 'hand_saw',
 'helicopter',
 'hen',
 'insects',
 'keyboard_typing',
 'laughing',
 'mouse_click',
 'pig',
 'pouring_water',
 'rain',
 'rooster',
 'sea_waves',
 'sheep',
 'siren',
 'sneezing',
 'snoring',
 'thunderstorm',
 'toilet_flush',
 'train',
 'vacuum_cleaner',
 'washing_machine',
 'water_drops',
 'wind']

### FOAMS Classes (Triggering and Non-Triggering)


In [8]:
dev_collection_path = "../data/metadata/FSD50K.metadata/collection/collection_dev.csv"

fsd_50k = pd.read_csv(dev_collection_path)
print(fsd_50k.shape[0])
fsd_50k.head()

40966


Unnamed: 0,fname,labels,mids
0,64760,Electric_guitar,/m/02sgy
1,16399,Electric_guitar,/m/02sgy
2,16401,Electric_guitar,/m/02sgy
3,16402,Electric_guitar,/m/02sgy
4,16404,Electric_guitar,/m/02sgy


In [20]:
fsd50k_to_foams = "../data/fsd50k_to_foams_mapping.json"

# Open json file and save all class names

with open(fsd50k_to_foams) as f:
    data = json.load(f)
    foams_trigger_classes = [k for k in data.keys()]

fsd_50k_triggers = fsd_50k[fsd_50k['labels'].isin(foams_trigger_classes)]
fsd_50k_triggers.head()


In [53]:
esc50 = pd.read_csv("../data/metadata/esc50.csv")

esc50_to_foams = "../data/esc50_to_foams_mapping.json"

# Open json file and save all class names

with open(esc50_to_foams) as f:
    data = json.load(f)
    esc_trigger_classes = [k for k in data.keys()]

esc50_triggers = esc50[esc50['category'].isin(esc_trigger_classes)]
print("Trigger classes from ESC-50: ", esc_trigger_classes)
print("Number of new samples: ", esc50_triggers.shape[0])


Trigger classes from ESC-50:  ['breathing', 'coughing', 'keyboard_typing', 'water_drops', 'drinking_sipping']
Number of new samples:  200


In [44]:
if not os.path.exists("../data/ESC-50-master"):
    print("Extracting ESC-50 dataset...")
    zip_ref = zipfile.ZipFile("../data/ESC-50-master.zip", 'r')
    zip_ref.extractall("../data")
    zip_ref.close()
    print("Extraction complete!")
else:
    print("ESC-50 dataset already extracted")

ESC-50 dataset already extracted


In [51]:
def sample_audio_file(file_path: str, class_name: str, meta_df: pd.DataFrame):
    class_samples = meta_df[meta_df['category'] == class_name]

    random.seed(10)
    row = random.choice(range(0, class_samples.shape[0]))

    audio_file = class_samples.iloc[row]['filename']
    audio_path = file_path + audio_file

    display(Audio(audio_path))


file_path = "../data/ESC-50-master/audio/"
for c in esc_trigger_classes:
    sample_audio_file(file_path = file_path, class_name = c, meta_df = esc50_triggers)