In [1]:
import os
import sys
from os import listdir
from os.path import isfile, join
import IPython.display as ipd
import librosa 
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import wavfile as wav
import numpy as np
from timeit import default_timer as timer

In [8]:
!pip install matplotlib
!pip install pandas



You should consider upgrading via the 'c:\users\pc\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.



Collecting pandas

You should consider upgrading via the 'c:\users\pc\appdata\local\programs\python\python38-32\python.exe -m pip install --upgrade pip' command.



  Downloading pandas-1.1.4-cp38-cp38-win32.whl (7.9 MB)
Installing collected packages: pandas
Successfully installed pandas-1.1.4


In [2]:
import soundfile as sf
from IPython.display import clear_output

In [3]:
metadata_path='UrbanSound8K/metadata/UrbanSound8K.csv'
metadata=pd.read_csv(metadata_path)
metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


### Augmentation using Time Stretch 

In [17]:
rates = [0.9, 1.1]
total = len(metadata) * len(rates)
count = 0
for rate in rates: 
    # Generate new stretched audio file
    for index, row in metadata.iterrows(): 
        curr_fold = str(row['fold'])
        curr_file_path='UrbanSound8K/audio' + '/fold' + curr_fold + '/' + row['slice_file_name']
        curr_rate_path='UrbanSound8K/Augmented_audio' + '/fold' + curr_fold + '/speed_' + str(int (rate*100))
        
        if not os.path.exists(curr_rate_path):
            os.makedirs(curr_rate_path)
        
        output_path=curr_rate_path + '/' + row['slice_file_name']
        
        if (os.path.isfile(output_path)):
            count += 1 
            continue
        
        y, sr = librosa.load(curr_file_path)  
        y_changed = librosa.effects.time_stretch(y, rate=rate)
        sf.write(output_path, y_changed, sr)
        
        count += 1 
        
        clear_output(wait=True)
        print("Progress: {}/{}".format(count, total))
        print("Last file: ", row['slice_file_name'])

Progress: 17464/17464
Last file:  99812-1-6-0.wav


### Augmentation using Pitch Shift 

In [18]:
tone_steps=[-2,2]
total = len(metadata) * len(tone_steps)
count = 0
for tone_step in tone_steps:
    # Generate new pitched audio
    for index, row in metadata.iterrows():   
        curr_fold = str(row['fold'])
        curr_file_path='UrbanSound8K/audio' + '/fold' + curr_fold + '/' + row['slice_file_name']
        curr_ps_path ='UrbanSound8K/Augmented_audio' + '/fold' + curr_fold + '/pitch_' + str(tone_step)
    

        # Create sub-dir if it does not exist
        if not os.path.exists(curr_ps_path):
            os.makedirs(curr_ps_path)
        
        output_path = curr_ps_path + '/' + row['slice_file_name']
        
        # Skip when file already exists
        if (os.path.isfile(output_path)):
            count += 1 
            continue
        
        y, sr = librosa.load(curr_file_path)  
        y_changed = librosa.effects.pitch_shift(y, sr, n_steps=tone_step)
        sf.write(output_path, y_changed, sr)
        
        count += 1 
        
        clear_output(wait=True)
        print("Progress: {}/{}".format(count, total))
        print("Last file: ", row['slice_file_name'])

Progress: 17464/17464
Last file:  99812-1-6-0.wav


### Augmentation using Time Stretch and Pitch Shift

In [5]:
tone_steps=[-2,2]
rates = [0.9, 1.1]
total = len(metadata) * len(tone_steps+rates)
count = 0
for tone_step in tone_steps:
    for rate in rates:

        for index, row in metadata.iterrows():   
            curr_fold = str(row['fold'])
            curr_file_path='UrbanSound8K/audio' + '/fold' + curr_fold + '/' + row['slice_file_name']
            curr_ps_path ='UrbanSound8K/Augmented_audio' + '/fold' + curr_fold + '/pitch_time' + str(tone_step)+str(int (rate*100))

            
            if not os.path.exists(curr_ps_path):
                os.makedirs(curr_ps_path)

            output_path = curr_ps_path + '/' + row['slice_file_name']

            # Skip when file already exists
            if (os.path.isfile(output_path)):
                count += 1 
                continue

            y, sr = librosa.load(curr_file_path)  
            y_changed = librosa.effects.pitch_shift(y, sr, n_steps=tone_step)
            y_changed_again=librosa.effects.time_stretch(y_changed, rate=rate)
            sf.write(output_path, y_changed_again, sr)

            count += 1 

            clear_output(wait=True)
            print("Progress: {}/{}".format(count, total))
            print("Last file: ", row['slice_file_name'])

Progress: 34928/34928
Last file:  99812-1-6-0.wav


In [6]:
def get_files_recursive(path):
    
    file_list = os.listdir(path)
    all_files = list()
 
    for entry in file_list:
  
        full_path = os.path.join(path, entry)
       
        if os.path.isdir(full_path):
            all_files = all_files + get_files_recursive(full_path)
        else:
            all_files.append(full_path)
                
    return all_files

In [7]:
# Get every single file within the tree
files = get_files_recursive('UrbanSound8K/Augmented_audio')

# Define metadata columns
names = []
classes = []
folds = []
augmented = []


for file in files:
    #pieces = file.split("/")
    #print(pieces)
    pieces2 = file.split("\\")
    #print(pieces2)
    file = pieces2[len(pieces2) - 1]
    #print(file)
    fold = pieces2[len(pieces2) - 3] 
    #print(fold)
    augment = pieces2[len(pieces2) - 2] 
    #print(augment)
    fold_num = fold[4:len(fold)]
    #print(fold_num)
    class_id = file.split("-")[1]
    #print(class_id)
    #break


    names.append(file)
    folds.append(fold_num)
    classes.append(class_id)
    augmented.append(augment)


new_metadata = pd.DataFrame({'file': names, 'fold': folds, 'class_id': classes, 'augment': augmented })

# Make sure class_id is int
new_metadata['class_id'] = new_metadata['class_id'].astype(np.int64)

print(len(new_metadata), "Augmented Data")

69856 Augmented Data


In [8]:
classes = pd.DataFrame({
    'class_id': range(0,10),
    'class': ['air_conditioner','car_horn','children_playing','dog_bark','drilling','engine_idling','gun_shot','jackhammer','siren','street_music']})

new_metadata = pd.merge(new_metadata, classes, on='class_id')

In [14]:
cols=["file", "fold", "class_id", "class", "augment"]
new_metadata=new_metadata.reindex(columns=cols)

In [16]:
metadata_augmented_path='UrbanSound8K/Augmented_metadata/UrbanSound8k_Augmented.csv'
new_metadata.to_csv(metadata_augmented_path, index=False, encoding="utf-8")

In [27]:
metadata=pd.read_csv('UrbanSound8K/Augmented_metadata/UrbanSound8K.csv')
del metadata['fsID'], metadata['start'], metadata['end'], metadata['salience']
metadata.columns = ['file', 'fold', 'class_id', 'class']
metadata['augment'] = 'none'

In [28]:
metadata.head()

Unnamed: 0,file,fold,class_id,class,augment
0,100032-3-0-0.wav,5,3,dog_bark,none
1,100263-2-0-117.wav,5,2,children_playing,none
2,100263-2-0-121.wav,5,2,children_playing,none
3,100263-2-0-126.wav,5,2,children_playing,none
4,100263-2-0-137.wav,5,2,children_playing,none


In [29]:
full_metadata = pd.concat([metadata, new_metadata])
if (len(full_metadata) == len(metadata) + len(new_metadata)):
    print("Dataframes merged correctly!")
else:
    print("Error! Lengths do not match.")

print("Initial data:", len(metadata))
print("New data:", len(new_metadata))
print("Merged data:", len(full_metadata))

Dataframes merged correctly!
Initial data: 8732
New data: 69856
Merged data: 78588


In [30]:
full_metadata.head()

Unnamed: 0,file,fold,class_id,class,augment
0,100032-3-0-0.wav,5,3,dog_bark,none
1,100263-2-0-117.wav,5,2,children_playing,none
2,100263-2-0-121.wav,5,2,children_playing,none
3,100263-2-0-126.wav,5,2,children_playing,none
4,100263-2-0-137.wav,5,2,children_playing,none


In [31]:
metadata_augmented_path='UrbanSound8K/Augmented_metadata/UrbanSound8k_Augmented.csv'
full_metadata.to_csv(metadata_augmented_path, index=False, encoding="utf-8")