## Audio Analysis of WAV files

Here we are looking for best way to reppresent audio recordings in a spectrogram for deeplearning

In [1]:
# load stuff we need
import json
import logging
import os
import time
import warnings
import librosa
import librosa.display # now seperate
import numpy as np
import pandas as pd
import pydub
import sklearn.preprocessing

from tqdm import tqdm
import h5py as h5py
from config import *

import random
import soundfile as sf

from datetime import datetime

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
# Project directory
project_name     = 'amsterdam_custom_samples'

# Other settings
proportion_train = 0.5

In [3]:
# Set your serval data folder (should be correctly set already)
serval_data_folder = "../data"
project_data_folder = serval_data_folder + '/' + project_name

In [4]:
# generate combinations from all classes
# classes from output step 01
input_selected_classes_filepath = project_data_folder + '/csv_files/01_input_selected_classes.csv'

input_selected_classes = pd.read_csv(input_selected_classes_filepath, sep=",")


db_levels = ['0db','6db','12db']
n = 500


In [5]:
input_selected_classes[1:]

Unnamed: 0,label,mid,display_name
1,1001,/c/a_1001,556 gunshots
2,1002,/c/a_1002,Brommer Alarm
3,1003,/c/a_1003,Brommer
4,1004,/c/a_1004,Claxons
5,1005,/c/a_1005,Dichtklappende autodeuren
6,1006,/c/a_1006,"Luide mensen (schreeuwen, enz)"
7,1007,/c/a_1007,Motoren
8,1008,/c/a_1008,Terrasgeluide (stille mensen)
9,1009,/c/a_1009,Versterkte muziek


In [6]:
from itertools import combinations

def build_combinations(input_classes, db_l_1='0db',db_l_2='0db' ,n=n):
    # generate combinations 0db
    df = pd.DataFrame(list(combinations(input_classes.label,2)),
                      columns=['file_1_label_filter','file_2_label_filter'])
    l = df.shape[0]
    df["file_1_filepath_filter"] = [db_l_1] * l
    df["file_2_filepath_filter"] = [db_l_2] * l
    df["cnt_generated_samples"] = n 
    df['resample_project_name'] = df.iloc[:,0:4].astype(str).apply('_'.join, axis=1)
    return df


In [7]:
db_combis =list(combinations(db_levels,2))

In [8]:
# we do not want to combine the silence class

# generator function
db_combi_gen = (build_combinations(input_classes=input_selected_classes[1:], 
                                   db_l_1 = combi[0],
                                   db_l_2 = combi[1]) for combi in iter(db_combis)) 

In [9]:
# generate rows for db combinations
df = pd.concat(list(db_combi_gen))

In [10]:
df

Unnamed: 0,file_1_label_filter,file_2_label_filter,file_1_filepath_filter,file_2_filepath_filter,cnt_generated_samples,resample_project_name
0,1001,1002,0db,6db,500,1001_1002_0db_6db
1,1001,1003,0db,6db,500,1001_1003_0db_6db
2,1001,1004,0db,6db,500,1001_1004_0db_6db
3,1001,1005,0db,6db,500,1001_1005_0db_6db
4,1001,1006,0db,6db,500,1001_1006_0db_6db
...,...,...,...,...,...,...
31,1006,1008,6db,12db,500,1006_1008_6db_12db
32,1006,1009,6db,12db,500,1006_1009_6db_12db
33,1007,1008,6db,12db,500,1007_1008_6db_12db
34,1007,1009,6db,12db,500,1007_1009_6db_12db


In [11]:
# add only one row for silence
df = df.append([{'file_1_label_filter': 1000,'file_2_label_filter': 1000, 
                 'file_1_filepath_filter': '0db', 'file_2_filepath_filter': '0db',
                 'cnt_generated_samples': 1500, 'resample_project_name': '1000_1000_0db_0db'
                }])


In [12]:
df.tail()


Unnamed: 0,file_1_label_filter,file_2_label_filter,file_1_filepath_filter,file_2_filepath_filter,cnt_generated_samples,resample_project_name
32,1006,1009,6db,12db,500,1006_1009_6db_12db
33,1007,1008,6db,12db,500,1007_1008_6db_12db
34,1007,1009,6db,12db,500,1007_1009_6db_12db
35,1008,1009,6db,12db,500,1008_1009_6db_12db
0,1000,1000,0db,0db,1500,1000_1000_0db_0db


In [13]:
# Set input files
input_wav_samples_to_combine_filepath     = project_data_folder + "/csv_files/03_input_wav_samples_to_combine.csv"

input_resampled_wav_samples              = project_data_folder + "/csv_files/02_output_resampled_wav_samples.csv"

# Both input and target
target_resampled_wav_samples              = project_data_folder + "/csv_files/03_output_resampled_wav_samples.csv"
#target_resampled_wav_samples_backup       = project_data_folder + "/csv_files/02_output_resampled_wav_samples_backup_" + datetime.today().strftime('%Y%m%d_%H%M%S') + '.csv'

# Set output filepath
target_resampled_wav_folder               = project_data_folder + "/wav_samples"

In [14]:
# write generated combination file for logging purposes
df.to_csv(input_wav_samples_to_combine_filepath, sep=';', index=False)

In [15]:
# Load selected wav samples from csv
df_combinations = pd.read_csv(input_wav_samples_to_combine_filepath, sep=";")

# Load all available wav samples from csv
df_samples = pd.read_csv(input_resampled_wav_samples, sep=";")

# Write backup csv back to disk
#df_samples.to_csv(target_resampled_wav_samples_backup, sep=";", index=False)

# Print
display(df_combinations)

Unnamed: 0,file_1_label_filter,file_2_label_filter,file_1_filepath_filter,file_2_filepath_filter,cnt_generated_samples,resample_project_name
0,1001,1002,0db,6db,500,1001_1002_0db_6db
1,1001,1003,0db,6db,500,1001_1003_0db_6db
2,1001,1004,0db,6db,500,1001_1004_0db_6db
3,1001,1005,0db,6db,500,1001_1005_0db_6db
4,1001,1006,0db,6db,500,1001_1006_0db_6db
...,...,...,...,...,...,...
104,1006,1009,6db,12db,500,1006_1009_6db_12db
105,1007,1008,6db,12db,500,1007_1008_6db_12db
106,1007,1009,6db,12db,500,1007_1009_6db_12db
107,1008,1009,6db,12db,500,1008_1009_6db_12db


In [16]:
# RESAMPLE 16 bit 16 kHz mono
import numpy as np
import scipy.signal
import librosa
import resampy

def resample(file):
    # Load in some audio
    x, sr_orig = librosa.load(file, sr=None, mono=True)

    sr_new = 16000
    # Or use the pre-built high-quality filter
    y = resampy.resample(x, sr_orig, sr_new, filter='kaiser_best') 
    
    # add noise as large as 1x standdeviation
    sd = np.sqrt(y.var())
    noise = np.random.normal(0,sd,y.size)
    
    # shuffle
    i = np.random.randint(y.size)
    y = np.append(y[i:y.size],y[0:i])
    
    return(y,sr_new)

In [17]:
def combine_samples(df_samples, train_or_test):
    df_samples_target = df_samples.copy(deep=True)

    for index, row in df_combinations.iterrows():
        ## Get and create output folder if it does not exist yet
        target_filepath = target_resampled_wav_folder + '/' + train_or_test + '/combine_' + row.resample_project_name
        if not os.path.exists(os.path.dirname(target_filepath + '/')):
            os.makedirs(os.path.dirname(target_filepath + '/'))

        ## Get selected samples
        df_filtered_samples_1 = df_samples.loc[(df_samples.label.isin([row.file_1_label_filter])    | (row.file_1_label_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.filepath.apply(lambda x : row.file_1_filepath_filter in x) | (row.file_1_filepath_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.ind_train == train_or_test)]
        df_filtered_samples_2 = df_samples.loc[(df_samples.label.isin([row.file_2_label_filter])    | (row.file_2_label_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.filepath.apply(lambda x : row.file_2_filepath_filter in x) | (row.file_2_filepath_filter in ['*', 'KEEP_ALL'])) &
                                               (df_samples.ind_train == train_or_test)]

        if (len(df_filtered_samples_1) <= 3) or (len(df_filtered_samples_2) <= 3):
            print('[WARNING]: Invalid filter used; found samples 1: ', len(df_filtered_samples_1), '; found samples 2: ', len(df_filtered_samples_2), sep='')
            print(row)
            continue

        # Generate random sample
        permutation_1 = np.random.choice(range(0, len(df_filtered_samples_1)), size=row.cnt_generated_samples, replace=True)
        permutation_2 = np.random.choice(range(0, len(df_filtered_samples_2)), size=row.cnt_generated_samples, replace=True)

        ## Iterate over all samples
        for index in tqdm(range(1, row.cnt_generated_samples)):
            file1 = df_filtered_samples_1.iloc[permutation_1[index]].filepath
            file2 = df_filtered_samples_2.iloc[permutation_2[index]].filepath

            filename1 = os.path.basename(os.path.splitext(os.path.normpath(file1))[0])
            filename2 = os.path.basename(os.path.splitext(os.path.normpath(file2))[0])

            target_combined_filepath = target_filepath + '/' + filename1 + '__' + filename2 + '.wav'

            df_wav_samples_file1 = df_samples.loc[df_samples.filepath.apply(lambda x : file1 in x)]
            df_wav_samples_file2 = df_samples.loc[df_samples.filepath.apply(lambda x : file2 in x)]

            df = df_wav_samples_file1.copy(deep=True).append(df_wav_samples_file2, ignore_index=True)
            df.source = 'combined/' + df.source
            df.filename = target_combined_filepath
            df.filepath = target_combined_filepath
            df.filename_old = filename1 + '//' + filename2
            df.filepath_old = ""
            
            df = df.drop_duplicates(subset = ["label"])
            
            if (len(df_wav_samples_file1) == 0) or (len(df_wav_samples_file2) == 0):
                print('[ERROR]: Invalid file selected; file1: ', file1, '; file2: ', file2, sep='')
                continue

            df_samples_target = df_samples_target.append(df, ignore_index=True)
            
            y1, sr1 = resample(file1)
            y2, sr2 = resample(file2)
            if len(y1) > len(y2):
                y2 = y2 + y1[0:len(y2)]
                sf.write(target_combined_filepath, y2, sr2, subtype='PCM_16')
            else:
                y1 = y1 + y2[0:len(y1)]
                sf.write(target_combined_filepath, y1, sr1, subtype='PCM_16')

    return(df_samples_target)

In [18]:
df_samples_target_train = combine_samples(df_samples, "train")

100%|██████████| 499/499 [02:11<00:00,  3.80it/s]
100%|██████████| 499/499 [02:09<00:00,  3.85it/s]
100%|██████████| 499/499 [02:11<00:00,  3.79it/s]
100%|██████████| 499/499 [02:08<00:00,  3.88it/s]
100%|██████████| 499/499 [02:09<00:00,  3.85it/s]
100%|██████████| 499/499 [02:09<00:00,  3.86it/s]
100%|██████████| 499/499 [02:10<00:00,  3.84it/s]
100%|██████████| 499/499 [02:22<00:00,  3.50it/s]
100%|██████████| 499/499 [02:21<00:00,  3.54it/s]
100%|██████████| 499/499 [02:29<00:00,  3.34it/s]
100%|██████████| 499/499 [02:26<00:00,  3.41it/s]
100%|██████████| 499/499 [02:33<00:00,  3.25it/s]
100%|██████████| 499/499 [02:24<00:00,  3.44it/s]
100%|██████████| 499/499 [02:16<00:00,  3.65it/s]
100%|██████████| 499/499 [02:17<00:00,  3.62it/s]
100%|██████████| 499/499 [02:16<00:00,  3.66it/s]
100%|██████████| 499/499 [02:17<00:00,  3.63it/s]
100%|██████████| 499/499 [02:34<00:00,  3.23it/s]
100%|██████████| 499/499 [02:26<00:00,  3.40it/s]
100%|██████████| 499/499 [02:26<00:00,  3.41it/s]


In [19]:
# dit wil je niet op eval doen?
#df_samples_target_train_eval = combine_samples(df_samples_target_train, "eval")

In [20]:
# Write full resampled wav files back to disk
#df_samples_target_train_eval.to_csv(target_resampled_wav_samples, sep=';', index=False)
df_samples_target_train.to_csv(target_resampled_wav_samples, sep=';', index=False)

In [21]:
# report
#df_samples_target_train_eval = pd.read_csv(target_resampled_wav_samples, sep=";")

#display(df_samples_target_train_eval.groupby(['label', 'mid', 'display_name']).agg(['nunique']))
display(df_samples_target_train.groupby(['label', 'mid', 'display_name']).count())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source,filename,filepath,old_filename,old_filepath,ind_train,display_name.1
label,mid,display_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1
1000,/c/a_1000,Silence,1589,1589,1589,1589,1589,1589,1589
1001,/c/a_1001,556 gunshots,14604,14604,14604,14604,14604,14604,14604
1002,/c/a_1002,Brommer Alarm,12045,12045,12045,12045,12045,12045,12045
1003,/c/a_1003,Brommer,14412,14412,14412,14412,14412,14412,14412
1004,/c/a_1004,Claxons,12372,12372,12372,12372,12372,12372,12372
1005,/c/a_1005,Dichtklappende autodeuren,12468,12468,12468,12468,12468,12468,12468
1006,/c/a_1006,"Luide mensen (schreeuwen, enz)",13878,13878,13878,13878,13878,13878,13878
1007,/c/a_1007,Motoren,12129,12129,12129,12129,12129,12129,12129
1008,/c/a_1008,Terrasgeluide (stille mensen),13584,13584,13584,13584,13584,13584,13584
1009,/c/a_1009,Versterkte muziek,12078,12078,12078,12078,12078,12078,12078
