In [15]:
import numpy as np
import random
import itertools
import librosa
import IPython.display as ipd
import matplotlib.pyplot as plt
import random
import glob
from random import shuffle
%matplotlib inline

In [40]:
class audio_aug():
    def __init__(self, random_files = None):
        if random_files is not None:
            self.random_files = random_files
    
    def __plot_time_series__(self, data, aug_data = None):
        fig = plt.figure(figsize=(12, 3))
        plt.title('Raw wave ')
        plt.ylabel('Amplitude')
        plt.plot(np.linspace(0, 1, len(data)), data)
        plt.show()
        
        fig = plt.figure(figsize=(12, 3))
        plt.title('Augmented wave ')
        plt.ylabel('Amplitude')
        plt.plot(np.linspace(0, 1, len(aug_data)), aug_data)
        plt.show()
    
    def _load_audio_file_(self, file_path):
    #     input_length = 16000
        data = librosa.core.load(file_path)[0] #, sr=16000
        return data

    def stretch(self, data, rate=1, plot=False):
        data_stretch = librosa.effects.time_stretch(data, rate)
        if plot:
            self.plot_time_series(data, data_stretch)
        return data_stretch

    def add_noise(self, data, noise_faction = 0.0075, plot = False):
        wn = np.random.randn(len(data))
        data_wn = data + 0.01*wn
        if plot:
            self.plot_time_series(data, data_wn)
        return data_wn

    def audio_transition(self, data, tran_factor = 1000, plot = False):
        data_roll = np.roll(data, tran_factor)
        if plot:
            self.plot_time_series(data, data_roll)
        return data_roll

    def combine_wavs(self, src, trgt=None, random_file = True, vol_trgt = 0.25, random_vol = False, plot=False):

        if random_file:
            noise_files = glob.glob('wav/random/*.wav')
            random_val = random.randint(0, len(noise_files)-1)
            trgt = self._load_audio_file_("{}".format(noise_files[random_val]))
        if random_vol:
            vol_trgt = random.randint(10, 50)/100 #CLIPPING MAX VOL TO 60

        data_combined = src +  trgt[:len(src)]*vol_trgt
        if plot:
            self.__plot_time_series__(data, data_combined)
        return data_combined

In [41]:
class auto_aug():
    def __init__(self, func_list):
        self.func_list = func_list
        
    def shuffle_labeled_data(self,data, lbls):
        combined = list(zip(data, lbls))
        shuffle(combined)
        data[:], lbls[:] = zip(*combined)
        return data, lbls
    
    def augment_all_data(self, data, lbls, func_list = None, random_func = True, load_from_path = False, load_func = None, shuffle_data = True):
        if func_list is None:
            func_list = self.func_list
            random_func = True
            print("Setting random functions to TRUE as func_list is not passed")
        if load_from_path:
            if load_func is None:
                print("Kindly pass a function which reads data and returns it")
            data = list(map(load_func, data))
        
        if type(data) is list: 
            aug_data = []
            aug_labels = []
            for _,fc in enumerate(func_list):
                aug_data.extend(list(map(fc, data)))
                aug_labels.extend(lbls)
        if shuffle_data:
            aug_data[:], aug_labels[:] = self.shuffle_labeled_data(aug_data, aug_labels)
        return aug_data, aug_labels

In [42]:
aug = audio_aug('wav/random/')

# wav = aug.combine_wavs(data, random_vol = True)
# ipd.Audio(wav, rate = 16000)

lst = ["wav/a1.wav", "wav/a2.wav"]
lbls = ["go", "stair"]

auto_ = auto_aug([aug.add_noise, aug.audio_transition, aug.combine_wavs])
output, lbls = auto_.augment_all_data(lst, lbls, load_from_path= True, load_func = aug._load_audio_file_)

Setting random functions to TRUE as func_list is not passed


In [49]:
ind = 0
print(lbls[ind])
ipd.Audio(output[ind], rate=16000)

stair


In [48]:
# def load_audio_file(file_path):
# #     input_length = 16000
#     data = librosa.core.load(file_path)[0] #, sr=16000
#     return data

# def plot_time_series(data):
#     fig = plt.figure(figsize=(14, 8))
#     plt.title('Raw wave ')
#     plt.ylabel('Amplitude')
#     plt.plot(np.linspace(0, 1, len(data)), data)
#     plt.show()
# data = load_audio_file("wav/a1.wav")
# # plot_time_series(data)
# ipd.Audio(data, rate=16000)
# # Adding white noise 
# def add_noise(data, noise_faction = 0.0075, plot = False):
#     wn = np.random.randn(len(data))
#     data_wn = data + 0.01*wn
#     if plot:
#         plot_time_series(data_wn)
#     return data_wn

# data_wn = add_noise(data)
# ipd.Audio(data_wn, rate=16000)
# def audio_transition(data, tran_factor = 1000, plot = False):
#     data_roll = np.roll(data, tran_factor)
#     if plot:
#         plot_time_series(data_roll)
#     return data_roll
# data_roll = audio_transition(data)
# ipd.Audio(data_roll, rate=16000)

# # stretching the sound
# # value less than 1 will slow the sound
# # higher than 1 will fasten the sound
# def stretch(data, rate=1, plot=False):
#     data_stretch = librosa.effects.time_stretch(data, rate)
#     if plot:
#         plot_time_series(data_stretch)
#     return data_stretch


# data_stretch = stretch(data, 1.3)
# print("This makes the sound deeper but we can still hear 'off' ")
# ipd.Audio(data_stretch, rate=16000)
# data2 = load_audio_file("wav/a2.wav")
# ipd.Audio(data2, rate=16000)

# def combine_wavs(src, trgt, random_file = True, vol_trgt = 0.15, random_vol = False, plot=True):
    
#     if random_file:
#         noise_files = glob.glob('wav/random/*.wav')
#         random_val = random.randint(0, len(noise_files)-1)
#         trgt = load_audio_file("{}".format(noise_files[random_val]))
#     if random_vol:
#         vol_trgt = random.randint(0, 50)/100 #CLIPPING MAX VOL TO 50
#     print(vol_trgt)
#     data_combined = src +  trgt[:len(src)]*vol_trgt
#     if plot:
#         plot_time_series(data_combined)
#     return data_combined

In [None]:
data_combined = combine_wavs(data, data2,random_vol=True, plot=False)
ipd.Audio(data_combined, rate = 16000)