In [None]:
## This notebook generates a youtube class labels files; This file can be used when parsing the Youtube WAV files to tfrecords

In [4]:
## Import packages
import os
import csv
import random
import math
import ntpath
import numpy as np
import pandas as pd
import time
from pathlib import Path
from scipy.io import wavfile

import tensorflow as tf
from tensorflow import gfile
from tensorflow import logging

In [5]:
import sys

In [6]:
# Set your serval data folder (should be correctly set already)
serval_data_folder = "../data"

In [7]:
# Set directories of WAV files
youtube_wav_samples_directory = serval_data_folder + "/wav_samples_youtube"

youtube_class_labels_filepath = serval_data_folder + "/csv_files/class_labels_indices.csv"

youtube_wav_balanced_train_class_labels_filepath   = youtube_wav_samples_directory + "/balanced_train_segments.csv"
youtube_wav_balanced_eval_class_labels_filepath    = youtube_wav_samples_directory + "/eval_segments.csv"
youtube_wav_unbalanced_train_class_labels_filepath = youtube_wav_samples_directory + "/unbalanced_train_segments.csv"

youtube_wav_balanced_train_sample_directory   = youtube_wav_samples_directory + "/bal"
youtube_wav_balanced_eval_sample_directory    = youtube_wav_samples_directory + "/eval"
youtube_wav_unbalanced_train_sample_directory = youtube_wav_samples_directory + "/unbal"

In [8]:
# Set output csv files
target_balanced_wav_samples_enumerated_filepath   = serval_data_folder + "/csv_files/wav_samples_youtube_balanced_all_enumerated_and_labeled.csv"
target_unbalanced_wav_samples_enumerated_filepath = serval_data_folder + "/csv_files/wav_samples_youtube_unbalanced_all_enumerated_and_labeled.csv"
target_eval_wav_samples_enumerated_filepath       = serval_data_folder + "/csv_files/wav_samples_youtube_eval_all_enumerated_and_labeled.csv"

In [13]:
def collectAndLabelYoutubeSamples(df_youtube_class_labels, wav_class_labels_filepath, wav_sample_directory, source):
    df_wav_class_labels = pd.read_csv(wav_class_labels_filepath, sep=",")    
    df_output_wav_files_classified = pd.DataFrame(columns=df_youtube_class_labels.columns.values.tolist() + ['source', 'filename', 'filepath'])
    
    start_time = time.time()
    files = gfile.Glob(str(wav_sample_directory + "/" + "*.wav"))
    count = 1
    for file in files:
        filename = ntpath.basename(file)[3:-4]
        df_labels = df_wav_class_labels.loc[df_wav_class_labels.filename.isin([filename])]
        
        if len(df_labels) != 1:
            print('[ERROR]: Found sample with ', len(df_labels) ,' class labels records (expected=1); filename=', filename, '; path=', file, sep='')
            continue
        
        classes = str(df_labels.youtube_mid_labels.iloc[0]).split(";")
        for class_ in classes:
            df_row = df_youtube_class_labels.loc[df_youtube_class_labels.mid == class_].copy(deep=True)
            
            if len(df_row) != 1:
                print('[ERROR]: Found sample with ', len(df_row) ,' youtube class labels records (expected=1); filename=', filename, '; youtube_mid_id=', class_, sep='')
                continue
            
            df_row['source'] = source
            df_row['filename'] = filename
            df_row['filepath'] = file
            df_output_wav_files_classified = df_output_wav_files_classified.append(df_row, ignore_index = True)
            
        count = count + 1
        if count % 100 == 0:
            print("[INFO]: Count=", count, "/", len(files), "; Time=", round((time.time() - start_time)), "; labels=", len(df_output_wav_files_classified),  sep='')
            
    
    df_output_wav_files_classified = df_output_wav_files_classified.rename(columns={'index': 'label'})
    return(df_output_wav_files_classified)
    

In [14]:
['index', 'label', 'description', 'file_path', 'filename', 'class_name']

['index', 'label', 'description', 'file_path', 'filename', 'class_name']

In [15]:
# Load generic youtube class labels
df_youtube_class_labels = pd.read_csv(youtube_class_labels_filepath, sep=",")
df_youtube_class_labels.head()

Unnamed: 0,index,mid,display_name
0,0,/m/09x0r,Speech
1,1,/m/05zppz,"Male speech, man speaking"
2,2,/m/02zsn,"Female speech, woman speaking"
3,3,/m/0ytgt,"Child speech, kid speaking"
4,4,/m/01h8n0,Conversation


In [16]:
#  collect and label balanced train samples
df_tmp = collectAndLabelYoutubeSamples(df_youtube_class_labels, 
                                       youtube_wav_balanced_train_class_labels_filepath,
                                       youtube_wav_balanced_train_sample_directory,
                                      'wav_samples_youtube/bal')
df_tmp.to_csv(target_balanced_wav_samples_enumerated_filepath, sep=';')

print(len(df_tmp))
print(df_tmp.head())

[INFO]: Count=100/1057; Time=1; labels=258
[INFO]: Count=200/1057; Time=1; labels=494
[INFO]: Count=300/1057; Time=2; labels=788
[INFO]: Count=400/1057; Time=2; labels=1082
[INFO]: Count=500/1057; Time=3; labels=1386
[INFO]: Count=600/1057; Time=3; labels=1660
[INFO]: Count=700/1057; Time=4; labels=1947
[INFO]: Count=800/1057; Time=5; labels=2223
[INFO]: Count=900/1057; Time=5; labels=2509
[INFO]: Count=1000/1057; Time=6; labels=2784
2936
  label         mid      display_name                   source     filename  \
0   427   /m/032s66  Gunshot, gunfire  wav_samples_youtube/bal  --aaILOrkII   
1   431   /m/073cg4           Cap gun  wav_samples_youtube/bal  --aaILOrkII   
2    32  /t/dd00003      Male singing  wav_samples_youtube/bal  --aO5cdqSAg   
3    34  /t/dd00005     Child singing  wav_samples_youtube/bal  --aO5cdqSAg   
4   137    /m/04rlf             Music  wav_samples_youtube/bal  -24dqQM_rDk   

                                            filepath  
0  ..\data\wav_samples_yout

In [17]:
#  collect and label unbalanced train samples
df_tmp = collectAndLabelYoutubeSamples(df_youtube_class_labels, 
                                       youtube_wav_unbalanced_train_class_labels_filepath,
                                       youtube_wav_unbalanced_train_sample_directory,
                                      'wav_samples_youtube/unbal')
df_tmp.to_csv(target_unbalanced_wav_samples_enumerated_filepath, sep=';')

print(len(df_tmp))
print(df_tmp.head())

[INFO]: Count=100/50734; Time=5; labels=258
[INFO]: Count=200/50734; Time=8; labels=517
[INFO]: Count=300/50734; Time=11; labels=773
[INFO]: Count=400/50734; Time=14; labels=1030
[INFO]: Count=500/50734; Time=18; labels=1300
[INFO]: Count=600/50734; Time=21; labels=1556
[INFO]: Count=700/50734; Time=24; labels=1814
[INFO]: Count=800/50734; Time=28; labels=2064
[INFO]: Count=900/50734; Time=31; labels=2322
[INFO]: Count=1000/50734; Time=34; labels=2569
[INFO]: Count=1100/50734; Time=37; labels=2834
[INFO]: Count=1200/50734; Time=41; labels=3092
[INFO]: Count=1300/50734; Time=44; labels=3371
[INFO]: Count=1400/50734; Time=47; labels=3634
[INFO]: Count=1500/50734; Time=51; labels=3876
[INFO]: Count=1600/50734; Time=54; labels=4126
[INFO]: Count=1700/50734; Time=57; labels=4397
[INFO]: Count=1800/50734; Time=61; labels=4654
[INFO]: Count=1900/50734; Time=64; labels=4907
[INFO]: Count=2000/50734; Time=67; labels=5165
[INFO]: Count=2100/50734; Time=71; labels=5412
[INFO]: Count=2200/50734; T

In [18]:
#  collect and label balanced eval samples
df_tmp = collectAndLabelYoutubeSamples(df_youtube_class_labels, 
                                       youtube_wav_balanced_eval_class_labels_filepath,
                                       youtube_wav_balanced_eval_sample_directory,
                                      'wav_samples_youtube/eval')
df_tmp.to_csv(target_eval_wav_samples_enumerated_filepath, sep=';')

print(len(df_tmp))
print(df_tmp.head())

[INFO]: Count=100/1072; Time=1; labels=252
[INFO]: Count=200/1072; Time=1; labels=540
[INFO]: Count=300/1072; Time=2; labels=829
[INFO]: Count=400/1072; Time=2; labels=1114
[INFO]: Count=500/1072; Time=3; labels=1419
[INFO]: Count=600/1072; Time=4; labels=1726
[INFO]: Count=700/1072; Time=4; labels=2038
[INFO]: Count=800/1072; Time=5; labels=2333
[INFO]: Count=900/1072; Time=5; labels=2584
[INFO]: Count=1000/1072; Time=6; labels=2877
3100
  label        mid display_name                    source     filename  \
0   137   /m/04rlf        Music  wav_samples_youtube/eval  -1II0Di9Hkc   
1   525   /m/06bz3        Radio  wav_samples_youtube/eval  -1II0Di9Hkc   
2   137   /m/04rlf        Music  wav_samples_youtube/eval  -1OlgJWehn8   
3   525   /m/06bz3        Radio  wav_samples_youtube/eval  -1OlgJWehn8   
4   347  /m/01j4z9     Chainsaw  wav_samples_youtube/eval  -23CeprtibU   

                                            filepath  
0  ..\data\wav_samples_youtube\eval\vid-1II0Di9Hk...  
1 