In [None]:
## This notebook selects the youtube wav samples

In [2]:
## Import packages
import os
import csv
import random
import math
import ntpath
import numpy as np
import pandas as pd
import time
from pathlib import Path
from scipy.io import wavfile
import sys

In [3]:
# Project directory
project_name = 'amsterdam_custom_samples'

In [4]:
# Set your serval data folder (should be correctly set already)
serval_data_folder = "../data"
project_data_folder = serval_data_folder + '/' + project_name

In [5]:
# Set output csv files
input_balanced_wav_samples_enumerated_filepath   = serval_data_folder + "/csv_files/wav_samples_youtube_balanced_all_enumerated_and_labeled.csv"
input_unbalanced_wav_samples_enumerated_filepath = serval_data_folder + "/csv_files/wav_samples_youtube_unbalanced_all_enumerated_and_labeled.csv"
input_eval_wav_samples_enumerated_filepath       = serval_data_folder + "/csv_files/wav_samples_youtube_eval_all_enumerated_and_labeled.csv"

# Custom
input_custom_wav_samples_enumerated_filepath      = serval_data_folder + "/wav_samples_custom/wav_samples_custom_all_enumerated_and_labeled.csv"

In [6]:
# Project input and output files
input_selected_classes_filepath = project_data_folder + '/csv_files/input_selected_wav_samples.csv'
target_selected_classes_filepath = project_data_folder + '/csv_files/output_selected_wav_samples.csv'

In [7]:
# Load selected classes from csv
df_selected_classes = pd.read_csv(input_selected_classes_filepath, sep=",")

# Print
df_selected_classes

Unnamed: 0,label,mid,display_name
0,1000,/c/a_1000,3 Distance 9mm gun shots
1,1001,/c/a_1001,556 gunshots
2,1002,/c/a_1002,Brommer Alarm
3,1003,/c/a_1003,Brommer
4,1004,/c/a_1004,Claxons
...,...,...,...
61,420,/m/03p19w,Jackhammer
62,424,/m/0_ksk,Power tool
63,425,/m/01d380,Drill
64,426,/m/014zdl,Explosion


In [8]:
# Load youtube wav samples and classes
df_balanced_wav_samples_enumerated   = pd.read_csv(input_balanced_wav_samples_enumerated_filepath, sep=";")
df_unbalanced_wav_samples_enumerated = pd.read_csv(input_unbalanced_wav_samples_enumerated_filepath, sep=";")
df_eval_wav_samples_enumerated       = pd.read_csv(input_eval_wav_samples_enumerated_filepath, sep=";")

# Remove unnamed index column if it exists
df_balanced_wav_samples_enumerated   = df_balanced_wav_samples_enumerated.loc  [:, ~df_balanced_wav_samples_enumerated.columns.str.match('Unnamed')]
df_unbalanced_wav_samples_enumerated = df_unbalanced_wav_samples_enumerated.loc[:, ~df_unbalanced_wav_samples_enumerated.columns.str.match('Unnamed')]
df_eval_wav_samples_enumerated       = df_eval_wav_samples_enumerated.loc      [:, ~df_eval_wav_samples_enumerated.columns.str.match('Unnamed')]

# Combine
df_youtube_samples = df_balanced_wav_samples_enumerated.append(df_unbalanced_wav_samples_enumerated).append(df_eval_wav_samples_enumerated)

# Print
df_youtube_samples.head()

Unnamed: 0,label,mid,display_name,source,filename,filepath
0,427,/m/032s66,"Gunshot, gunfire",wav_samples_youtube/bal,--aaILOrkII,..\data\wav_samples_youtube\bal\vid--aaILOrkII...
1,431,/m/073cg4,Cap gun,wav_samples_youtube/bal,--aaILOrkII,..\data\wav_samples_youtube\bal\vid--aaILOrkII...
2,32,/t/dd00003,Male singing,wav_samples_youtube/bal,--aO5cdqSAg,..\data\wav_samples_youtube\bal\vid--aO5cdqSAg...
3,34,/t/dd00005,Child singing,wav_samples_youtube/bal,--aO5cdqSAg,..\data\wav_samples_youtube\bal\vid--aO5cdqSAg...
4,137,/m/04rlf,Music,wav_samples_youtube/bal,-24dqQM_rDk,..\data\wav_samples_youtube\bal\vid-24dqQM_rDk...


In [9]:
# Load custom wav samples and classes
df_custom_wav_samples_enumerated = pd.read_csv(input_custom_wav_samples_enumerated_filepath, sep=";")

# Remove unnamed index column if it exists
df_custom_wav_samples_enumerated = df_custom_wav_samples_enumerated.loc  [:, ~df_custom_wav_samples_enumerated.columns.str.match('Unnamed')]

# Print
df_custom_wav_samples_enumerated.head()

Unnamed: 0,label,mid,display_name,source,filename,filepath
0,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.06.wav,..\data\wav_samples_custom\3_distance_9mm_shot...
1,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.10.wav,..\data\wav_samples_custom\3_distance_9mm_shot...
2,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.105.wav,..\data\wav_samples_custom\3_distance_9mm_shot...
3,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.106.wav,..\data\wav_samples_custom\3_distance_9mm_shot...
4,1000,/c/a_1000,3 Distance 9mm gun shots,custom_amsterdam_sample,shot distance9mm.107.wav,..\data\wav_samples_custom\3_distance_9mm_shot...


In [10]:
# Combine
df_wav_samples_enumerated = df_youtube_samples.append(df_custom_wav_samples_enumerated)

In [11]:
# Filter all csv with interesting classes
df_selected_samples = df_wav_samples_enumerated.loc[df_wav_samples_enumerated.mid.isin(df_selected_classes.mid)]

In [12]:
display(df_selected_samples.groupby(['label', 'mid', 'display_name']).agg(['nunique']))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source,filename,filepath
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,nunique,nunique,nunique
label,mid,display_name,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
72,/m/0jbk,Animal,3,707,707
73,/m/068hy,"Domestic animals, pets",3,136,136
74,/m/0bt9lr,Dog,3,60,60
75,/m/05tny_,Bark,2,7,7
115,/m/0h0rv,"Pigeon, dove",1,11,11
127,/m/09xqv,Cricket,2,3,3
261,/m/015vgc,Carnatic music,1,2,2
282,/t/dd00037,Scary music,2,4,4
307,/m/0k4j,Car,3,2319,2319
308,/m/0912c9,"Vehicle horn, car horn, honking",3,136,136


In [13]:
# Write csv out again to 
df_selected_samples.to_csv(target_selected_classes_filepath, sep=';')