In [1]:
## This notebook selects the wav samples based on the sound classes of interest

In [2]:
## Import packages
import os
import csv
import random
import math
import ntpath
import numpy as np
import pandas as pd
import time
from pathlib import Path
from scipy.io import wavfile
import sys

In [3]:
# Project directory
project_name = 'amsterdam_custom_samples'

In [4]:
# Set your serval data folder (should be correctly set already)
serval_data_folder = "../data"
project_data_folder = serval_data_folder + '/' + project_name

In [5]:
# location of wav samples to be used
# Youtube samples from enumerated csv files
input_balanced_wav_samples_enumerated_filepath   = serval_data_folder + "/csv_files/wav_samples_youtube_balanced_all_enumerated_and_labeled.csv"
input_unbalanced_wav_samples_enumerated_filepath = serval_data_folder + "/csv_files/wav_samples_youtube_unbalanced_all_enumerated_and_labeled.csv"
input_eval_wav_samples_enumerated_filepath       = serval_data_folder + "/csv_files/wav_samples_youtube_eval_all_enumerated_and_labeled.csv"

# Custom samples
input_custom_wav_samples_enumerated_filepath      = serval_data_folder + "/wav_samples_custom/wav_samples_custom_all_enumerated_and_labeled.csv"

In [6]:
# Select classes of interest
input_selected_classes_filepath = project_data_folder + '/csv_files/01_input_selected_classes.csv'
target_selected_classes_filepath = project_data_folder + '/csv_files/01_output_selected_wav_samples.csv'

In [7]:
# Load selected classes from csv
df_selected_classes = pd.read_csv(input_selected_classes_filepath, sep=",")

# Print
df_selected_classes

Unnamed: 0,label,mid,display_name
0,73,/m/068hy,"Domestic animals, pets"
1,307,/m/0k4j,Car
2,308,/m/0912c9,"Vehicle horn, car horn, honking"
3,314,/t/dd00134,Car passing by
4,316,/m/07r04,Truck
5,318,/m/05x_td,"Air horn, truck horn"
6,322,/m/03j1ly,Emergency vehicle
7,347,/m/01j4z9,Chainsaw
8,353,/m/07q2z82,"Accelerating, revving, vroom"
9,396,/m/03kmc9,Siren


In [8]:
# Load youtube wav samples and classes
df_balanced_wav_samples_enumerated   = pd.read_csv(input_balanced_wav_samples_enumerated_filepath, sep=";")
df_unbalanced_wav_samples_enumerated = pd.read_csv(input_unbalanced_wav_samples_enumerated_filepath, sep=";")
df_eval_wav_samples_enumerated       = pd.read_csv(input_eval_wav_samples_enumerated_filepath, sep=";")

# Remove unnamed index column if it exists
df_balanced_wav_samples_enumerated   = df_balanced_wav_samples_enumerated.loc  [:, ~df_balanced_wav_samples_enumerated.columns.str.match('Unnamed')]
df_unbalanced_wav_samples_enumerated = df_unbalanced_wav_samples_enumerated.loc[:, ~df_unbalanced_wav_samples_enumerated.columns.str.match('Unnamed')]
df_eval_wav_samples_enumerated       = df_eval_wav_samples_enumerated.loc      [:, ~df_eval_wav_samples_enumerated.columns.str.match('Unnamed')]

# Combine
df_youtube_samples = df_balanced_wav_samples_enumerated.append(df_unbalanced_wav_samples_enumerated).append(df_eval_wav_samples_enumerated)

# Print
df_youtube_samples.head()

Unnamed: 0,label,mid,display_name,source,filename,filepath
0,500,/m/028v0c,Silence,wav_samples_youtube/bal,oh08bbDVFZM,../data/wav_samples_youtube/bal/vidoh08bbDVFZM...
1,435,/m/07pxg6y,Eruption,wav_samples_youtube/bal,oh08bbDVFZM,../data/wav_samples_youtube/bal/vidoh08bbDVFZM...
2,509,/t/dd00128,"Outside, urban or manmade",wav_samples_youtube/bal,oh08bbDVFZM,../data/wav_samples_youtube/bal/vidoh08bbDVFZM...
3,347,/m/01j4z9,Chainsaw,wav_samples_youtube/bal,xn_7_qB3L9M,../data/wav_samples_youtube/bal/vidxn_7_qB3L9M...
4,300,/m/07yv9,Vehicle,wav_samples_youtube/bal,xn_7_qB3L9M,../data/wav_samples_youtube/bal/vidxn_7_qB3L9M...


In [9]:
# Load custom wav samples and classes
df_custom_wav_samples_enumerated = pd.read_csv(input_custom_wav_samples_enumerated_filepath, sep=";")

# Remove unnamed index column if it exists
df_custom_wav_samples_enumerated = df_custom_wav_samples_enumerated.loc  [:, ~df_custom_wav_samples_enumerated.columns.str.match('Unnamed')]

# Print
df_custom_wav_samples_enumerated.head()

Unnamed: 0,label,mid,display_name,source,filename,filepath
0,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.136.wav,../data/wav_samples_custom/3_distance_9mm_shot...
1,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.185.wav,../data/wav_samples_custom/3_distance_9mm_shot...
2,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.98.wav,../data/wav_samples_custom/3_distance_9mm_shot...
3,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.17.wav,../data/wav_samples_custom/3_distance_9mm_shot...
4,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.67.wav,../data/wav_samples_custom/3_distance_9mm_shot...


In [10]:
# Combine
df_wav_samples_enumerated = df_youtube_samples.append(df_custom_wav_samples_enumerated)

In [11]:
# Filter all csv with interesting classes
df_selected_samples = df_wav_samples_enumerated.loc[df_wav_samples_enumerated.mid.isin(df_selected_classes.mid)]

In [12]:
display(df_selected_samples.groupby(['label', 'mid', 'display_name']).agg(['nunique']))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source,filename,filepath
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,nunique,nunique,nunique
label,mid,display_name,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
73,/m/068hy,"Domestic animals, pets",3,136,136
307,/m/0k4j,Car,3,2319,2319
308,/m/0912c9,"Vehicle horn, car horn, honking",3,136,136
314,/t/dd00134,Car passing by,3,3112,3112
316,/m/07r04,Truck,3,8839,8839
318,/m/05x_td,"Air horn, truck horn",3,50,50
322,/m/03j1ly,Emergency vehicle,3,644,644
347,/m/01j4z9,Chainsaw,3,1219,1219
353,/m/07q2z82,"Accelerating, revving, vroom",3,492,492
396,/m/03kmc9,Siren,3,121,121


In [13]:
# Write csv out again to 
df_selected_samples.to_csv(target_selected_classes_filepath, sep=';')