In [1]:
## Import packages
import os
import csv
import random
import math
import ntpath
import numpy as np
import pandas as pd
import time
from pathlib import Path
from scipy.io import wavfile
from tqdm import tqdm

import tensorflow as tf
from tensorflow import gfile
from tensorflow import logging

In [2]:
serval_data_folder = "../data"

In [3]:
# Set directories of WAV files
youtube_wav_samples_directory = serval_data_folder + "/wav_samples_youtube"

youtube_class_labels_filepath = serval_data_folder + "/csv_files/class_labels_indices.csv"

youtube_wav_balanced_train_class_labels_filepath   = youtube_wav_samples_directory + "/balanced_train_segments.csv"
youtube_wav_balanced_eval_class_labels_filepath    = youtube_wav_samples_directory + "/eval_segments.csv"
youtube_wav_unbalanced_train_class_labels_filepath = youtube_wav_samples_directory + "/unbalanced_train_segments.csv"

youtube_wav_balanced_train_sample_directory   = youtube_wav_samples_directory + "/bal"
youtube_wav_balanced_eval_sample_directory    = youtube_wav_samples_directory + "/eval"
youtube_wav_unbalanced_train_sample_directory = youtube_wav_samples_directory + "/unbal"

In [4]:
# Set output csv files
target_balanced_wav_samples_enumerated_filepath   = serval_data_folder + "/csv_files/wav_samples_youtube_balanced_all_enumerated_and_labeled.csv"
target_unbalanced_wav_samples_enumerated_filepath = serval_data_folder + "/csv_files/wav_samples_youtube_unbalanced_all_enumerated_and_labeled.csv"
target_eval_wav_samples_enumerated_filepath       = serval_data_folder + "/csv_files/wav_samples_youtube_eval_all_enumerated_and_labeled.csv"

In [5]:
def collectAndLabelYoutubeSamples(df_youtube_class_labels, wav_class_labels_filepath, wav_sample_directory, source):
    df_wav_class_labels = pd.read_csv(wav_class_labels_filepath, sep=",")    
    df_output_wav_files_classified = pd.DataFrame(columns=df_youtube_class_labels.columns.values.tolist() + ['source', 'filename', 'filepath'])
    
    start_time = time.time()
    files = gfile.Glob(str(wav_sample_directory + "/" + "*.wav"))
    for file in tqdm(files):
        filename = ntpath.basename(file)[3:-4]
        df_labels = df_wav_class_labels.loc[df_wav_class_labels.filename.isin([filename])]
        
        if len(df_labels) != 1:
            print('[ERROR]: Found sample with ', len(df_labels) ,' class labels records (expected=1); filename=', filename, '; path=', file, sep='')
            continue
        
        classes = str(df_labels.youtube_mid_labels.iloc[0]).split(";")
        for class_ in classes:
            df_row = df_youtube_class_labels.loc[df_youtube_class_labels.mid == class_].copy(deep=True)
            
            if len(df_row) != 1:
                print('[ERROR]: Found sample with ', len(df_row) ,' youtube class labels records (expected=1); filename=', filename, '; youtube_mid_id=', class_, sep='')
                continue
            
            df_row['source'] = source
            df_row['filename'] = filename
            df_row['filepath'] = file
            df_output_wav_files_classified = df_output_wav_files_classified.append(df_row, ignore_index = True)         
    
    df_output_wav_files_classified = df_output_wav_files_classified.rename(columns={'index': 'label'})
    return(df_output_wav_files_classified)
    

In [6]:
# Load generic youtube class labels
df_youtube_class_labels = pd.read_csv(youtube_class_labels_filepath, sep=",")
df_youtube_class_labels.head()

Unnamed: 0,index,mid,display_name
0,0,/m/09x0r,Speech
1,1,/m/05zppz,"Male speech, man speaking"
2,2,/m/02zsn,"Female speech, woman speaking"
3,3,/m/0ytgt,"Child speech, kid speaking"
4,4,/m/01h8n0,Conversation


In [7]:
#  collect and label balanced train samples
df_tmp = collectAndLabelYoutubeSamples(df_youtube_class_labels, 
                                       youtube_wav_balanced_train_class_labels_filepath,
                                       youtube_wav_balanced_train_sample_directory,
                                      'wav_samples_youtube/bal')
df_tmp.to_csv(target_balanced_wav_samples_enumerated_filepath, sep=';')

display(df_tmp.agg(['nunique']))
display(df_tmp.head())

100%|██████████| 1057/1057 [00:06<00:00, 158.54it/s]


Unnamed: 0,label,mid,display_name,source,filename,filepath
nunique,173,173,173,1,1057,1057


Unnamed: 0,label,mid,display_name,source,filename,filepath
0,500,/m/028v0c,Silence,wav_samples_youtube/bal,oh08bbDVFZM,../data/wav_samples_youtube/bal/vidoh08bbDVFZM...
1,435,/m/07pxg6y,Eruption,wav_samples_youtube/bal,oh08bbDVFZM,../data/wav_samples_youtube/bal/vidoh08bbDVFZM...
2,509,/t/dd00128,"Outside, urban or manmade",wav_samples_youtube/bal,oh08bbDVFZM,../data/wav_samples_youtube/bal/vidoh08bbDVFZM...
3,347,/m/01j4z9,Chainsaw,wav_samples_youtube/bal,xn_7_qB3L9M,../data/wav_samples_youtube/bal/vidxn_7_qB3L9M...
4,300,/m/07yv9,Vehicle,wav_samples_youtube/bal,xn_7_qB3L9M,../data/wav_samples_youtube/bal/vidxn_7_qB3L9M...


In [8]:
#  collect and label balanced eval samples
df_tmp = collectAndLabelYoutubeSamples(df_youtube_class_labels, 
                                       youtube_wav_balanced_eval_class_labels_filepath,
                                       youtube_wav_balanced_eval_sample_directory,
                                      'wav_samples_youtube/eval')
df_tmp.to_csv(target_eval_wav_samples_enumerated_filepath, sep=';')

display(df_tmp.agg(['nunique']))
display(df_tmp.head())

100%|██████████| 1072/1072 [00:07<00:00, 148.85it/s]


Unnamed: 0,label,mid,display_name,source,filename,filepath
nunique,220,220,220,1,1072,1072


Unnamed: 0,label,mid,display_name,source,filename,filepath
0,316,/m/07r04,Truck,wav_samples_youtube/eval,Pef6g19i5iI,../data/wav_samples_youtube/eval/vidPef6g19i5i...
1,300,/m/07yv9,Vehicle,wav_samples_youtube/eval,Pef6g19i5iI,../data/wav_samples_youtube/eval/vidPef6g19i5i...
2,317,/m/0gvgw0,Air brake,wav_samples_youtube/eval,Pef6g19i5iI,../data/wav_samples_youtube/eval/vidPef6g19i5i...
3,314,/t/dd00134,Car passing by,wav_samples_youtube/eval,WatvT8A8iug,../data/wav_samples_youtube/eval/vidWatvT8A8iu...
4,338,/m/02l6bg,"Propeller, airscrew",wav_samples_youtube/eval,rzxqz7XqLHA,../data/wav_samples_youtube/eval/vidrzxqz7XqLH...


In [9]:
#  collect and label unbalanced train samples
df_tmp = collectAndLabelYoutubeSamples(df_youtube_class_labels, 
                                       youtube_wav_unbalanced_train_class_labels_filepath,
                                       youtube_wav_unbalanced_train_sample_directory,
                                      'wav_samples_youtube/unbal')
df_tmp.to_csv(target_unbalanced_wav_samples_enumerated_filepath, sep=';')

display(df_tmp.agg(['nunique']))
display(df_tmp.head())

100%|██████████| 50734/50734 [41:51<00:00, 20.20it/s]


Unnamed: 0,label,mid,display_name,source,filename,filepath
nunique,396,396,396,1,50734,50734


Unnamed: 0,label,mid,display_name,source,filename,filepath
0,301,/m/019jd,"Boat, Water vehicle",wav_samples_youtube/unbal,whkOoQGtBEE,../data/wav_samples_youtube/unbal/vidwhkOoQGtB...
1,304,/m/02rlv9,"Motorboat, speedboat",wav_samples_youtube/unbal,whkOoQGtBEE,../data/wav_samples_youtube/unbal/vidwhkOoQGtB...
2,300,/m/07yv9,Vehicle,wav_samples_youtube/unbal,whkOoQGtBEE,../data/wav_samples_youtube/unbal/vidwhkOoQGtB...
3,301,/m/019jd,"Boat, Water vehicle",wav_samples_youtube/unbal,GIB1VHipMhw,../data/wav_samples_youtube/unbal/vidGIB1VHipM...
4,304,/m/02rlv9,"Motorboat, speedboat",wav_samples_youtube/unbal,GIB1VHipMhw,../data/wav_samples_youtube/unbal/vidGIB1VHipM...
