### now as we know the dataset must be compatible to insert into a neural network
### so we need to convert it into .csv file

In [1]:
import librosa
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from os import listdir
from os.path import isfile, join

### first, we defined a function to extract out log-mel-spectrograms of each clip

In [2]:
#define a function to get log-melspectrograms of audio files
def log_mel_spec(file):
    y,sr = librosa.load(file)
    
    five_sec = 5 * sr
    diff = abs(len(y) - five_sec)
    #if len >5 sec then slice wave
    if(len(y) > five_sec):
        st_slice = np.random.randint(diff)
        end_slice = len(y) - (diff - st_slice)
        y = y[st_slice:end_slice]
        
    #if len<5 sec then pad wave
    elif(len(y) < five_sec):
        st_pad = np.random.randint(diff)
        end_pad = diff - st_pad
        y = np.pad(y, (st_pad,end_pad), "constant")

    mel = librosa.feature.melspectrogram(y = y)
    #log_mel = librosa.amplitude_to_db(mel)
    log_mel = librosa.power_to_db(mel)
    return log_mel

### now we extract filenames from good category

In [3]:
#extract list of filenames from 'good' folder
dir_good = "H:/original+augmented2/good"
good_files = [f for f in listdir(dir_good) if isfile(join(dir_good, f))]

In [4]:
good_files

['flip-rahul-brian-clip1_3,3,3.wav',
 'flip-rahul-brian-clip3_3,3,2.5.wav',
 'flip-rahul-brian-clip5_3,3,3.wav',
 'flip-rahul-clip108_3,3,2.5.wav',
 'flip-rahul-clip114_3,2.5,3.wav',
 'flip-rahul-clip176_3,2.5,3.wav',
 'flip-rahul-clip196_3,3,2.5.wav',
 'flip-rahul-clip197_3,3,3.wav',
 'flip-rahul-clip198_3,3,3.wav',
 'flip-rahul-clip199_3,3,3.wav',
 'flip-rahul-clip201_3,3,3.wav',
 'flip-rahul-clip202_3,3,3.wav',
 'flip-rahul-clip203_3,3,3.wav',
 'flip-rahul-clip204_3,3,3.wav',
 'flip-rahul-clip205_3,3,3.wav',
 'flip-rahul-clip206_3,3,3.wav',
 'flip-rahul-clip220_3,2.5,3.wav',
 'flip-rahul-clip227_3,2.5,3.wav',
 'flip-rahul-clip250_3,3,2.5.wav',
 'flip-rahul-clip251_3,3,2.5.wav',
 'flip-rahul-clip252_3,3,2.5.wav',
 'flip-rahul-clip254_3,2.5,3.wav',
 'flip-rahul-clip27_3,3,3.wav',
 'flip-rahul-clip281_3,3,2.5.wav',
 'flip-rahul-clip291_3,3,2.5.wav',
 'flip-rahul-clip302_3,3,2.5.wav',
 'flip-rahul-clip329_3,3,3.wav',
 'flip-rahul-clip330_3,3,3.wav',
 'flip-rahul-clip331_3,3,2.5.wav',
 '

In [5]:
#get complete location of each file
good_files = list(map(lambda x : dir_good + '/' + x, good_files))

### here we extracted out log-mel-specs of good audio clips in the form of list

In [6]:
#get log mel spectrograms of good_files
good_logmels = [log_mel_spec(i) for i in good_files]

### then we save those log-mel-specs into a new numpy file

In [9]:
#store good_logmels to a numpy file
np.save("H:/spec_data2/good_logmels",good_logmels)

### here we created a new dataframe to store filenames of labels of good class

In [10]:
#store in a dataframe
#data_good = pd.DataFrame(np.transpose([good_files,good_logmels,['good'] * len(good_files)]),columns = ['filenames','logmels''class'])
data_good = pd.DataFrame({'filenames':good_files,'class':['good'] * len(good_files)})

### now we repeat the same above proceducre for bad and average audio clips

In [11]:
#extract list of filenames from 'bad' folder
dir_bad = "H:/original+augmented2/bad"
bad_files = [f for f in listdir(dir_bad) if isfile(join(dir_bad, f))]

In [12]:
#get complete location of each file
bad_files = list(map(lambda x : dir_bad + '/' + x, bad_files))

In [13]:
#get log mel spectrograms of bad_files
bad_logmels = [log_mel_spec(i) for i in bad_files]

In [14]:
#store good_logmels to a numpy file
np.save("H:/spec_data2/bad_logmels",bad_logmels)

In [15]:
#store in a dataframe
data_bad = pd.DataFrame({'filenames':bad_files,'class':['bad'] * len(bad_files)})

In [16]:
#extract list of filenames from 'avg' folder
dir_avg = "H:/original+augmented2/average"
avg_files = [f for f in listdir(dir_avg) if isfile(join(dir_avg, f))]

In [17]:
#get complete location of each file
avg_files = list(map(lambda x : dir_avg + '/' + x, avg_files))

In [18]:
#get log mel spectrograms of avg_files
avg_logmels = [log_mel_spec(i) for i in avg_files]

In [19]:
#store avg_logmels to a numpy file
np.save("H:/spec_data2/avg_logmels",avg_logmels)

In [20]:
#store in a dataframe
data_avg = pd.DataFrame({'filenames':avg_files,'class':['average'] * len(avg_files)})

### now we create a final empty dataframe

In [21]:
#now create a final empty dataframe
data = pd.DataFrame(columns = ['filenames','class'])

### in this we append all 3 dataframes of good/bad/average class

In [22]:
#append all 3 dataframes in 'data'
data = data.append(data_good)
data = data.append(data_bad)
data = data.append(data_avg)

In [23]:
data

Unnamed: 0,filenames,class
0,H:/original+augmented2/good/flip-rahul-brian-c...,good
1,H:/original+augmented2/good/flip-rahul-brian-c...,good
2,H:/original+augmented2/good/flip-rahul-brian-c...,good
3,H:/original+augmented2/good/flip-rahul-clip108...,good
4,H:/original+augmented2/good/flip-rahul-clip114...,good
5,H:/original+augmented2/good/flip-rahul-clip176...,good
6,H:/original+augmented2/good/flip-rahul-clip196...,good
7,H:/original+augmented2/good/flip-rahul-clip197...,good
8,H:/original+augmented2/good/flip-rahul-clip198...,good
9,H:/original+augmented2/good/flip-rahul-clip199...,good


### then finally we save the complete dataframe as a .csv file

In [24]:
#save 'data' dataframe to csv file
data.to_csv('H:/spec_data2/data.csv',index = False)