## Audio Analysis of WAV files

Here we are looking for best way to reppresent audio recordings in a spectrogram for deeplearning

In [1]:
# load stuff we need
import json
import logging
import os
import time
import warnings
import librosa
import librosa.display # now seperate
import numpy as np
import pandas as pd
import pydub
import sklearn.preprocessing

from tqdm import tqdm
import h5py as h5py
from config import *

import random
import soundfile as sf

from datetime import datetime

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
# Project directory
project_name     = 'amsterdam_custom_samples'

# Other settings
proportion_train = 0.5

In [3]:
# Set your serval data folder (should be correctly set already)
serval_data_folder = "../data"
project_data_folder = serval_data_folder + '/' + project_name

In [4]:
# Set input files
input_wav_samples_to_combine_filepath     = project_data_folder + "/csv_files/03_input_wav_samples_to_combine.csv"

input_resampled_wav_samples              = project_data_folder + "/csv_files/02_output_resampled_wav_samples.csv"

# Both input and target
target_resampled_wav_samples              = project_data_folder + "/csv_files/03_output_resampled_wav_samples.csv"
#target_resampled_wav_samples_backup       = project_data_folder + "/csv_files/02_output_resampled_wav_samples_backup_" + datetime.today().strftime('%Y%m%d_%H%M%S') + '.csv'

# Set output filepath
target_resampled_wav_folder               = project_data_folder + "/wav_samples"

In [5]:
# Load selected wav samples from csv
df_combinations = pd.read_csv(input_wav_samples_to_combine_filepath, sep=";")

# Load all available wav samples from csv
df_samples = pd.read_csv(input_resampled_wav_samples, sep=";")

# Write backup csv back to disk
#df_samples.to_csv(target_resampled_wav_samples_backup, sep=";", index=False)

# Print
display(df_combinations)

Unnamed: 0,file_1_label_filter,file_1_filepath_filter,file_2_label_filter,file_2_filepath_filter,cnt_generated_samples,resample_project_name
0,1002,0db,1003,0db,1000,1002_1003_0db
1,1004,0db,1005,0db,1000,1004_1005_0db
2,1002,6db,1003,6db,1000,1002_1003_6db
3,1004,6db,1005,6db,1000,1004_1005_6db
4,1004,0db,308,0db,1000,1004_308_0db
5,396,0db,424,0db,1000,396_424_0db
6,308,0db,73,0db,1000,308_73_0db
7,1004,6db,1005,6db,1000,1004_1005_6db


In [6]:
# RESAMPLE 16 bit 16 kHz mono
import numpy as np
import scipy.signal
import librosa
import resampy

def resample(file):
    # Load in some audio
    x, sr_orig = librosa.load(file, sr=None, mono=True)

    sr_new = 16000
    # Or use the pre-built high-quality filter
    y = resampy.resample(x, sr_orig, sr_new, filter='kaiser_best') 
    
    # add noise as large as 1x standdeviation
    sd = np.sqrt(y.var())
    noise = np.random.normal(0,sd,y.size)
    
    return(y,sr_new)

In [7]:
def combine_samples(df_samples, train_or_test):
    df_samples_target = df_samples.copy(deep=True)

    for index, row in df_combinations.iterrows():
        ## Get and create output folder if it does not exist yet
        target_filepath = target_resampled_wav_folder + '/' + train_or_test + '/combine_' + row.resample_project_name
        if not os.path.exists(os.path.dirname(target_filepath + '/')):
            os.makedirs(os.path.dirname(target_filepath + '/'))

        ## Get selected samples
        df_filtered_samples_1 = df_samples.loc[(df_samples.label.isin([row.file_1_label_filter])    | (row.file_1_label_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.filepath.apply(lambda x : row.file_1_filepath_filter in x) | (row.file_1_filepath_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.ind_train == train_or_test)]
        df_filtered_samples_2 = df_samples.loc[(df_samples.label.isin([row.file_2_label_filter])    | (row.file_2_label_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.filepath.apply(lambda x : row.file_2_filepath_filter in x) | (row.file_2_filepath_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.ind_train == train_or_test)]

        if (len(df_filtered_samples_1) <= 3) or (len(df_filtered_samples_2) <= 3):
            print('[WARNING]: Invalid filter used; found samples 1: ', len(df_filtered_samples_1), '; found samples 2: ', len(df_filtered_samples_2), sep='')
            print(row)
            continue

        # Generate random sample
        permutation_1 = np.random.choice(range(0, len(df_filtered_samples_1)), size=row.cnt_generated_samples, replace=True)
        permutation_2 = np.random.choice(range(0, len(df_filtered_samples_2)), size=row.cnt_generated_samples, replace=True)

        ## Iterate over all samples
        for index in tqdm(range(1, row.cnt_generated_samples)):
            file1 = df_filtered_samples_1.iloc[permutation_1[index]].filepath
            file2 = df_filtered_samples_2.iloc[permutation_2[index]].filepath

            filename1 = os.path.basename(os.path.splitext(os.path.normpath(file1))[0])
            filename2 = os.path.basename(os.path.splitext(os.path.normpath(file2))[0])

            target_combined_filepath = target_filepath + '/' + filename1 + '__' + filename2 + '.wav'

            df_wav_samples_file1 = df_samples.loc[df_samples.filepath.apply(lambda x : file1 in x)]
            df_wav_samples_file2 = df_samples.loc[df_samples.filepath.apply(lambda x : file2 in x)]

            df = df_wav_samples_file1.copy(deep=True).append(df_wav_samples_file2, ignore_index=True)
            df.source = 'combined/' + df.source
            df.filename = target_combined_filepath
            df.filepath = target_combined_filepath
            df.filename_old = filename1 + '//' + filename2
            df.filepath_old = ""
            
            df = df.drop_duplicates(subset = ["label"])
            
            if (len(df_wav_samples_file1) == 0) or (len(df_wav_samples_file2) == 0):
                print('[ERROR]: Invalid file selected; file1: ', file1, '; file2: ', file2, sep='')
                continue

            df_samples_target = df_samples_target.append(df, ignore_index=True)
            
            y1, sr1 = resample(file1)
            y2, sr2 = resample(file2)
            if len(y1) > len(y2):
                y2 = y2 + y1[0:len(y2)]
                sf.write(target_combined_filepath, y2, sr2, subtype='PCM_16')
            else:
                y1 = y1 + y2[0:len(y1)]
                sf.write(target_combined_filepath, y1, sr1, subtype='PCM_16')

    return(df_samples_target)

In [8]:
df_samples_target_train = combine_samples(df_samples, "train")

100%|██████████| 999/999 [06:09<00:00,  2.70it/s]
100%|██████████| 999/999 [06:29<00:00,  2.57it/s]
100%|██████████| 999/999 [06:30<00:00,  2.56it/s]
100%|██████████| 999/999 [06:17<00:00,  2.64it/s]
100%|██████████| 999/999 [06:15<00:00,  2.66it/s]
100%|██████████| 999/999 [06:18<00:00,  2.64it/s]
100%|██████████| 999/999 [05:41<00:00,  2.93it/s]
100%|██████████| 999/999 [05:40<00:00,  2.94it/s]


In [9]:
df_samples_target_train_eval = combine_samples(df_samples_target_train, "eval")

100%|██████████| 999/999 [04:52<00:00,  3.41it/s]
100%|██████████| 999/999 [04:57<00:00,  3.36it/s]
100%|██████████| 999/999 [04:50<00:00,  3.44it/s]
100%|██████████| 999/999 [04:51<00:00,  3.42it/s]
100%|██████████| 999/999 [05:11<00:00,  3.20it/s]
100%|██████████| 999/999 [05:07<00:00,  3.25it/s]
100%|██████████| 999/999 [05:07<00:00,  3.25it/s]
100%|██████████| 999/999 [04:52<00:00,  3.42it/s]


In [10]:
# Write full resampled wav files back to disk
df_samples_target_train_eval.to_csv(target_resampled_wav_samples, sep=';', index=False)

In [11]:
# report
df_samples_target_train_eval = pd.read_csv(target_resampled_wav_samples, sep=";")

#display(df_samples_target_train_eval.groupby(['label', 'mid', 'display_name']).agg(['nunique']))
display(df_samples_target_train_eval.groupby(['label', 'mid', 'display_name']).count())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source,filename,filepath,old_filename,old_filepath,ind_train
label,mid,display_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
73,/m/068hy,"Domestic animals, pets",2406,2406,2406,2406,2406,2406
307,/m/0k4j,Car,7454,7454,7454,7454,7454,7454
308,/m/0912c9,"Vehicle horn, car horn, honking",4442,4442,4442,4442,4442,4442
314,/t/dd00134,Car passing by,9459,9459,9459,9459,9459,9459
316,/m/07r04,Truck,30507,30507,30507,30507,30507,30507
318,/m/05x_td,"Air horn, truck horn",646,646,646,646,646,646
322,/m/03j1ly,Emergency vehicle,3442,3442,3442,3442,3442,3442
347,/m/01j4z9,Chainsaw,5570,5570,5570,5570,5570,5570
353,/m/07q2z82,"Accelerating, revving, vroom",1484,1484,1484,1484,1484,1484
396,/m/03kmc9,Siren,2437,2437,2437,2437,2437,2437
