## Audio Analysis of WAV files

Here we are looking for best way to reppresent audio recordings in a spectrogram for deeplearning

In [1]:
# load stuff we need
import json
import logging
import os
import time
import warnings
import librosa
import librosa.display # now seperate
import numpy as np
import pandas as pd
import pydub
import sklearn.preprocessing

from tqdm import tqdm
import h5py as h5py
from config import *

import random
import soundfile as sf

Import of 'jit' requested from: 'numba.decorators', please update to use 'numba.core.decorators' or pin to Numba version 0.48.0. This alias will not be present in Numba version 0.50.0.
  from numba.decorators import jit as optional_jit


In [2]:
# Project directory
project_name     = 'amsterdam_custom_samples'

# Other settings
proportion_train = 0.5

In [3]:
# Set your serval data folder (should be correctly set already)
serval_data_folder = "../data"
project_data_folder = serval_data_folder + '/' + project_name

In [4]:
# Set input files
input_selected_wav_samples_filepath       = project_data_folder + "/csv_files/01_output_selected_wav_samples.csv"
target_resampled_wav_samples              = project_data_folder + "/csv_files/02_output_resampled_wav_samples.csv"
target_intermediate_resampled_wav_samples = project_data_folder + "/csv_files/02_intermediate_output_resampled_wav_samples.csv"

target_resampled_wav_folder               = project_data_folder + "/wav_samples"

In [5]:
# Load selected wav samples from csv
df_selected_samples = pd.read_csv(input_selected_wav_samples_filepath, sep=";")

# Remove unnamed index column if it exists
df_selected_samples = df_selected_samples.loc  [:, ~df_selected_samples.columns.str.match('Unnamed')]

# Print
display(df_selected_samples.groupby(['label', 'mid', 'display_name']).agg(['nunique']))

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source,filename,filepath
Unnamed: 0_level_1,Unnamed: 1_level_1,Unnamed: 2_level_1,nunique,nunique,nunique
label,mid,display_name,Unnamed: 3_level_2,Unnamed: 4_level_2,Unnamed: 5_level_2
73,/m/068hy,"Domestic animals, pets",3,136,136
307,/m/0k4j,Car,3,2319,2319
308,/m/0912c9,"Vehicle horn, car horn, honking",3,136,136
314,/t/dd00134,Car passing by,3,3112,3112
316,/m/07r04,Truck,3,8839,8839
318,/m/05x_td,"Air horn, truck horn",3,50,50
322,/m/03j1ly,Emergency vehicle,3,644,644
347,/m/01j4z9,Chainsaw,3,1219,1219
353,/m/07q2z82,"Accelerating, revving, vroom",3,492,492
396,/m/03kmc9,Siren,3,121,121


In [6]:
# RESAMPLE 16 bit 16 kHz mono
import numpy as np
import scipy.signal
import librosa
import resampy

def resample(file):
    # Load in some audio
    x, sr_orig = librosa.load(file, sr=None, mono=True)

    sr_new = 16000
    # Or use the pre-built high-quality filter
    y = resampy.resample(x, sr_orig, sr_new, filter='kaiser_best') 
    return(y,sr_new)

In [7]:
def reduce_volume_and_store_file(df_selected_samples, target_resampled_wav_folder, input_file, ind_train, db, db_name, sr, y):
    ## Get filename without wav extension
    filename = os.path.basename(os.path.splitext(os.path.normpath(file))[0])
    directory_name = os.path.basename(os.path.dirname(os.path.normpath(file)))

    target_filename = db_name + '_' + filename
    target_filepath = target_resampled_wav_folder + '/' + ind_train + '/' + db_name + '/' + directory_name +'/'+ target_filename + '.wav'
    
    if not os.path.exists(os.path.dirname(target_filepath)):
        os.makedirs(os.path.dirname(target_filepath))

    df_row = df_selected_samples.loc[df_selected_samples.filepath == file].copy(deep=True)
    if len(df_row) == 0:
        print('[ERROR]: Found sample with ', len(df_row) ,' class labels records (expected=1); filename=', filename, '; directory_name=', directory_name, sep='')
        return pd.DataFrame(columns=['label', 'mid', 'display_name', 'source', 'filename', 'filepath', 'old_filename', 'old_filepath'])

    df_row['ind_train'] = ind_train
    df_row['old_filename'] = df_row.filename
    df_row['old_filepath'] = df_row.filepath
    df_row.filename = target_filepath
    df_row.filepath = target_filepath

    # Resample
    gain = 10.0 ** (-db / 10.0)
    y = y * gain
    sf.write(target_filepath, y, sr, subtype='PCM_16')
    y = y / gain # So we do not need a deep copy of the object
    
    return(df_row)

In [8]:
# RESAMPLE AND ADJUST VOLUME FOR ALL WAVS IN FOLDER
df_all_resampled_wav_files = pd.DataFrame(columns=['label', 'mid', 'display_name', 'source', 'filename', 'filepath', 'old_filename', 'old_filepath', 'ind_train'])
files = df_selected_samples.filepath.unique()

count = 0

for file in tqdm(files):
    # random choice between trn or eval label
    train_or_test = random.choices(['train', 'eval'],[proportion_train, 1-proportion_train])[0]

    ## resample 16 kHz mono
    y , sr = resample(file)
    
    df_all_resampled_wav_files = df_all_resampled_wav_files.append(reduce_volume_and_store_file(df_selected_samples, target_resampled_wav_folder, file, train_or_test, 0, '0db', sr, y), ignore_index=True)
    df_all_resampled_wav_files = df_all_resampled_wav_files.append(reduce_volume_and_store_file(df_selected_samples, target_resampled_wav_folder, file, train_or_test, 6, '6db', sr, y), ignore_index=True)
    df_all_resampled_wav_files = df_all_resampled_wav_files.append(reduce_volume_and_store_file(df_selected_samples, target_resampled_wav_folder, file, train_or_test, 12, '12db', sr, y), ignore_index=True)
    #df_all_resampled_wav_files = df_all_resampled_wav_files.append(reduce_volume_and_store_file(df_selected_samples, target_resampled_wav_folder, file, train_or_test, 18, '18db', sr, y), ignore_index=True)
    
    ## Write output file intermidiate result
    count = count + 1
    if count % 100 == 0:
        df_all_resampled_wav_files.to_csv(target_intermediate_resampled_wav_samples, sep=';', index=False)

100%|██████████| 20078/20078 [2:27:31<00:00,  2.27it/s]  


In [9]:
# Write full resampled wav files back to disk
df_all_resampled_wav_files.to_csv(target_resampled_wav_samples, sep=';', index=False)

In [10]:
# report
display(df_all_resampled_wav_files.groupby(['label', 'mid', 'display_name']).count())

Unnamed: 0_level_0,Unnamed: 1_level_0,Unnamed: 2_level_0,source,filename,filepath,old_filename,old_filepath,ind_train
label,mid,display_name,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
73,/m/068hy,"Domestic animals, pets",408,408,408,408,408,408
307,/m/0k4j,Car,6957,6957,6957,6957,6957,6957
308,/m/0912c9,"Vehicle horn, car horn, honking",408,408,408,408,408,408
314,/t/dd00134,Car passing by,9336,9336,9336,9336,9336,9336
316,/m/07r04,Truck,26517,26517,26517,26517,26517,26517
318,/m/05x_td,"Air horn, truck horn",150,150,150,150,150,150
322,/m/03j1ly,Emergency vehicle,1932,1932,1932,1932,1932,1932
347,/m/01j4z9,Chainsaw,3657,3657,3657,3657,3657,3657
353,/m/07q2z82,"Accelerating, revving, vroom",1476,1476,1476,1476,1476,1476
396,/m/03kmc9,Siren,363,363,363,363,363,363
