# Noise Augmentation


In [None]:
#Importing Libraries for Audio Editing and Plotting
import librosa
import librosa.display
import pandas as pd
import os
from pathlib import Path
import noisereduce as nr
import random
import soundfile as sf

In [None]:
#Load audio files using Librosa
download_path = Path.cwd()/'metadata'/'cnt_Philippinesgrp_birds'
data_path = Path.cwd()

df = pd.DataFrame()
for file in os.listdir(download_path):
    if 'csv' in file:
        filepath = os.path.join(download_path, file)
        if df.empty:
            df = pd.read_csv(filepath)
        else:
            temp_df = pd.read_csv(filepath)
            df = pd.concat([df, temp_df])
    
df['recordings__en'] = df['recordings__en'].apply(lambda x: x.replace(' ', ''))
df['relative_path'] = '/audio' + '/' + df['recordings__en'].astype(str) + '/' + df['recordings__id'].astype(str) + '.mp3'
df = df[['recordings__en', 'relative_path', 'recordings__file-name', 'recordings__id']]

# Pick Only 10 for Labels
# PhilippineBulbul 60
# Balicassiao 59
# White-earedBrownDove 57
# LuzonHawk-Owl 51
# PhilippineCoucal 45
# White-browedShortwing 43
# ElegantTit 42
# Long-tailedBushWarbler 42
# White-ventedWhistler 38
# Grey-backedTailorbird 36

df = df[df['recordings__en'].isin(['PhilippineBulbul', 'Balicassiao', 'White-earedBrownDove', 'LuzonHawk-Owl', 'PhilippineCoucal', 'White-browedShortwing', 'ElegantTit',
                        'Long-tailedBushWarbler', 'White-ventedWhistler', 'Grey-backedTailorbird'])]

#Clean the Data
labels = df['recordings__en']
labels_unique = set(labels)
keys = {key: value for key, value in zip(labels_unique, range(len(labels_unique)))}

df2 = pd.Series(keys).to_frame('labels')
df2 = pd.DataFrame(df2).reset_index()
df2.columns = ['recordings__en', 'labels']

#print(df2.drop_duplicates())

df = pd.merge(df, df2, on="recordings__en")

df = df.dropna()
df = df.drop('recordings__file-name', axis=1)

df.reset_index(inplace = True)     
print('\nRow Count:', df.shape[0])    

In [None]:
for i in df.index:
    print(df['relative_path'][i], df['labels'][i])
    a_file = str(data_path) + df['relative_path'][i]
    dur, sr = librosa.load(a_file)
    scale, sr = librosa.load(a_file, duration=4, offset=random.random()*librosa.get_duration(dur))
    scale = librosa.resample(scale, orig_sr=sr, target_sr=44100, res_type='kaiser_best')
    scale = nr.reduce_noise(y=scale, sr=44100)
    
    address = str(data_path) + '/augaudio' + '/' + df['recordings__id'][i].astype(str) + '.wav'
    print(address)
    sf.write(address, scale, sr)