In [43]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
import librosa
np.set_printoptions(suppress=True)

In [44]:
audiopaths = pd.read_csv('./datapath0.csv')
audiopaths = np.array(audiopaths)
m,n = audiopaths.shape

def noise(audio):
  noise = np.random.normal(0, audio.std(), audio.size)
  aug_audio = audio + (noise*0.1)
  return aug_audio

def stretch(audio):
  sth_audio = librosa.effects.time_stretch(audio, rate=0.5)
  return sth_audio

def scale_pitch(audio,sr):
  return librosa.effects.pitch_shift(audio, sr, 0.7)

def ex_features(audio):
  data_row = np.array([])

  mel_audio = np.mean(librosa.feature.melspectrogram(audio).T, axis=0)  # mel-spectrogram
  data_row = np.hstack((data_row, mel_audio))
  
  zcr_audio = np.mean(librosa.feature.zero_crossing_rate(audio).T, axis=0)    # zero-crossing-rate
  data_row = np.hstack((data_row, zcr_audio))

  rms_audio = np.mean(librosa.feature.rms(audio).T, axis=0)   # RMS energy
  data_row = np.hstack((data_row, rms_audio))

  chroma_audio = np.mean(librosa.feature.chroma_stft(audio).T, axis=0) # chromagram
  data_row = np.hstack((data_row, chroma_audio))
  
  mfcc_audio = np.mean(librosa.feature.mfcc(audio).T, axis=0)   # mel frequency cepstral coefficients
  data_row = np.hstack((data_row, mfcc_audio))

  return data_row

def get_features(audio_wav):
  audio,sr = librosa.load(audio_wav, res_type='kaiser_fast', duration=2.5, sr=44100, offset=0.6)

  or_audio = ex_features(audio)    # row 0
  data_block = np.array(or_audio)

  noisy_audio = ex_features(noise(audio))   # row 1
  data_block = np.vstack((data_block, noisy_audio))

  st_audio = stretch(audio)
  pitch_audio = scale_pitch(st_audio, sr)
  al_audio = ex_features(pitch_audio)   # row 2
  data_block = np.vstack((data_block, al_audio))

  return data_block

def make_array(i):
  ar=[]
  audio_wav = audiopaths[i][0]
  data_block = get_features(audio_wav)
  label = (audiopaths[i][1])
  ar.append([label,data_block[0]])
  ar.append([label,data_block[1]])
  ar.append([label,data_block[2]])
  return ar


In [49]:
#part 1
data_set = []

for i in range(0,3000):
  data_set.extend(make_array(i))
dataframe = pd.DataFrame(data_set)

dataframe.to_csv('./augmented_dataset1.csv', index=False)

In [None]:
#part 2
data_set = []

for i in range(3000,6000):
  data_set.extend(make_array(i))
dataframe = pd.DataFrame(data_set)

dataframe.to_csv('./augmented_dataset2.csv', index=False)

In [None]:
#part 3
data_set = []

for i in range(6000,9000):
  data_set.extend(make_array(i))
dataframe = pd.DataFrame(data_set)

dataframe.to_csv('./augmented_dataset3.csv', index=False)

In [None]:
#part 4
data_set = []

for i in range(9000,m):
  data_set.extend(make_array(i))
dataframe = pd.DataFrame(data_set)

dataframe.to_csv('./augmented_dataset4.csv', index=False)

In [None]:
augmented_dataset0=pd.concat(
    map(pd.read_csv, ['./augmented_dataset1.csv', './augmented_dataset2.csv', './augmented_dataset3.csv','./augmented_dataset4.csv']), ignore_index=True)
augmented_dataset0.to_csv("./augmented_dataset0.csv", index=False)

In [60]:
# Fixing exported csv
dataframe = pd.read_csv("./augmented_dataset0.csv")
dataframe["1"] = dataframe['1'].replace("           ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("          ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("         ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("        ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("       ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("      ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("     ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("    ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("   ",',',regex=True)
dataframe["1"] = dataframe['1'].replace("  ",',',regex=True)
dataframe["1"] = dataframe['1'].replace(" ",',',regex=True)
dataframe["1"] = dataframe['1'].replace(",,",',',regex=True)
dataframe["1"] = dataframe['1'].replace("\n",'',regex=True)
dataframe["1"] = dataframe['1'].replace("\[,",'[',regex=True)
dataframe["1"] = dataframe['1'].replace(",\]",']',regex=True)
dataframe.to_csv("./Final_Data.csv",index=False)