In [2]:
import os
import sys
from os import listdir
from os.path import isfile, join
import IPython.display as ipd
import librosa 
import librosa.display
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import wavfile as wav
import numpy as np
from timeit import default_timer as timer

In [3]:
def extract_MFCCandMel(filename):
    audio, sample_rate=librosa.load(filename, res_type='kaiser_fast')
    mfccs=np.mean(librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=50).T, axis=0)
    mel=np.mean(librosa.feature.melspectrogram(audio ,sr=sample_rate, n_mels=50,fmax=8000).T, axis=0)
    
    return mfccs, mel

In [4]:
metadata=pd.read_csv('UrbanSound8K/Augmented_metadata/UrbanSound8k_Augmented.csv')
metadata_ori=pd.read_csv('UrbanSound8K/metadata/UrbanSound8K.csv')
metadata.head()

Unnamed: 0,file,fold,class_id,class,augment
0,101415-3-0-2.wav,1,3,dog_bark,pitch_-2
1,101415-3-0-3.wav,1,3,dog_bark,pitch_-2
2,101415-3-0-8.wav,1,3,dog_bark,pitch_-2
3,102106-3-0-0.wav,1,3,dog_bark,pitch_-2
4,102842-3-0-1.wav,1,3,dog_bark,pitch_-2


In [5]:
fold_list = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'fold10']

In [6]:
Augment_list= ['pitch_2', 'pitch_-2','pitch_time290','pitch_time-290','pitch_time2110','pitch_time-2110','speed_90','speed_110']

In [7]:
stacked_features = []
exceptions=0
count_1=0
count_2=0
labels=['dog_bark', 'gun_shot', 'jackhammer', 'engine_idling',
       'children_playing', 'siren', 'street_music', 'air_conditioner',
       'drilling', 'car_horn']

start_time = timer()
for i in range(10):
    for j in range(8):
  
        mypath = 'UrbanSound8K/Augmented_audio/'+ fold_list[i] + '/' + Augment_list[j] + '/'
        files = [mypath + f for f in listdir(mypath) if isfile(join(mypath, f))]

        for fn in files:
            try: 
                mfccs,mels = extract_MFCCandMel(fn)
                features=np.reshape((np.hstack([mfccs,mels])),(20,5))
                count_1+=1

            except: 
                print(fn)
                exceptions += 1
                continue

            l_row = metadata.loc[metadata['file']==fn.split('/')[-1]].values.tolist()
            #print(l_row)
            label = l_row[0][-2]
            if label not in labels:
                raise Exception("\n Sorry, there is an error in the code.")
                
            #print(label)
            #exit()
            fold = i+1
            

            stacked_features.append([features, features.shape, label, fold])

            
for w in range(10):
    # get file names
    mypath = 'UrbanSound8K/audio/'+ fold_list[w] + '/'
    files = [mypath + f for f in listdir(mypath) if isfile(join(mypath, f))]
    
    for fn in files:
        try: # extract features
            mfccs,mels = extract_MFCCandMel(fn)
            features=np.reshape((np.hstack([mfccs,mels])),(20,5))
            count_2+=1
            
        except: # else exception (.ds_store files are part of mac file systems)
            print(fn)
            exceptions += 1
            continue
            
        l_row = metadata_ori.loc[metadata_ori['slice_file_name']==fn.split('/')[-1]].values.tolist()
        label = l_row[0][-1]
        if label not in labels:
                raise Exception("\n Sorry, there is an error in the code.")
        fold = w+1
    
        stacked_features.append([features, features.shape, label, fold])
        
        
print("Exceptions: ", exceptions)
end_time = timer()
print(print("time taken: {0} minutes {1:.1f} seconds".format((end_time - start_time)//60, (end_time - start_time)%60)))
print('Finished feature extraction from all folder')
print("Total features extracted from augmented part {}".format(count_1))
print("Total features extracted from non augmented part {}".format(count_2))



UrbanSound8K/audio/fold1/.DS_Store




UrbanSound8K/audio/fold2/.DS_Store
UrbanSound8K/audio/fold3/.DS_Store
UrbanSound8K/audio/fold4/.DS_Store
UrbanSound8K/audio/fold5/.DS_Store
UrbanSound8K/audio/fold6/.DS_Store
UrbanSound8K/audio/fold7/.DS_Store
UrbanSound8K/audio/fold8/.DS_Store
UrbanSound8K/audio/fold9/.DS_Store
UrbanSound8K/audio/fold10/.DS_Store
Exceptions:  10
time taken: 98.0 minutes 30.2 seconds
None
Finished feature extraction from all folder
Total features extracted from augmented part 69856
Total features extracted from non augmented part 8732


In [9]:
print(stacked_features[0])

[array([[-4.2842056e+02,  9.8572411e+01,  1.0260828e+01, -1.9949150e+01,
        -5.3445864e+00],
       [-2.4852421e+00, -1.9489704e+01, -5.9310741e+00, -1.1744574e+01,
         2.0196180e-01],
       [-3.4440475e+00, -3.2055762e+00,  3.9682336e+00,  1.1558492e+00,
         6.4095559e+00],
       [ 2.6183872e+00, -1.7926279e+00, -5.0788226e+00, -5.9173770e+00,
        -6.7679012e-01],
       [-5.9074984e+00, -4.1243200e+00, -5.4822879e+00, -1.6063289e-01,
        -2.3007553e+00],
       [-1.1926231e+00,  6.5177363e-01, -2.1656528e+00, -2.5940244e+00,
        -2.7537112e+00],
       [-3.0138681e+00, -3.2091963e+00, -2.5581405e+00, -4.4789257e+00,
        -2.2157476e+00],
       [-2.2501101e+00, -3.0774078e+00, -4.2685604e+00, -1.3626212e+00,
        -3.6199584e+00],
       [-3.1706736e+00, -6.2901407e-01, -3.4485915e+00, -2.4074914e+00,
        -2.6976581e+00],
       [-2.9300759e+00, -3.6643293e+00, -1.9631242e+00, -2.3684788e+00,
        -1.5632008e+00],
       [ 2.1607541e-03,  4.98

In [10]:
cols=['Stacked_Features', 'Matrix_Shape', 'Label', 'Fold']
Stacked_feature_pd=pd.DataFrame(data=stacked_features , columns=cols)
Stacked_feature_pd.head()

Unnamed: 0,Stacked_Features,Matrix_Shape,Label,Fold
0,"[[-428.42056, 98.57241, 10.260828, -19.94915, ...","(20, 5)",dog_bark,1
1,"[[-383.46613, 132.247, 17.835365, -33.931313, ...","(20, 5)",dog_bark,1
2,"[[-462.8479, 78.49388, 6.611009, -3.8108122, 1...","(20, 5)",dog_bark,1
3,"[[-282.7671, 93.10348, -51.64786, -12.338222, ...","(20, 5)",dog_bark,1
4,"[[-409.95215, 125.347595, 28.116976, 12.195362...","(20, 5)",gun_shot,1


In [11]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical

X = np.array(Stacked_feature_pd.Stacked_Features.tolist())
y = np.array(Stacked_feature_pd.Label.tolist())


le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))
print(yy.shape)

(78588, 10)


In [12]:
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(X, yy, test_size=0.2,random_state=42)

In [13]:
print(x_train.shape, x_test.shape, y_train.shape, y_test.shape, yy.shape)

(62870, 20, 5) (15718, 20, 5) (62870, 10) (15718, 10) (78588, 10)


In [14]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation, Flatten
from keras.layers import Convolution2D, Conv1D, MaxPooling1D, GlobalAveragePooling2D, LSTM, TimeDistributed
from keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics

In [15]:
model=Sequential()
model.add(Conv1D(64, kernel_size=3, padding="Same", activation="relu", input_shape=(20,5)))
model.add(MaxPooling1D(padding="same"))
model.add(Conv1D(128, kernel_size=3, padding="same", activation="relu"))
model.add(MaxPooling1D(padding="Same"))
model.add(LSTM(128,return_sequences=True))
model.add(LSTM(128, return_sequences=True))

model.add(TimeDistributed(Dense(256, activation="relu")))
model.add(Dropout(0.5))
model.add(TimeDistributed(Dense(512, activation="relu")))
model.add(Dropout(0.5))
model.add(Flatten())
model.add(Dense(10, activation="softmax"))

In [16]:
model.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer="adam")
model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
conv1d (Conv1D)              (None, 20, 64)            1024      
_________________________________________________________________
max_pooling1d (MaxPooling1D) (None, 10, 64)            0         
_________________________________________________________________
conv1d_1 (Conv1D)            (None, 10, 128)           24704     
_________________________________________________________________
max_pooling1d_1 (MaxPooling1 (None, 5, 128)            0         
_________________________________________________________________
lstm (LSTM)                  (None, 5, 128)            131584    
_________________________________________________________________
lstm_1 (LSTM)                (None, 5, 128)            131584    
_________________________________________________________________
time_distributed (TimeDistri (None, 5, 256)            3

In [17]:
score = model.evaluate(x_test, y_test, verbose=1)
accuracy = 100*score[1]

print("Pre-training accuracy: %.4f%%" % accuracy)

Pre-training accuracy: 8.4934%


In [18]:
import tensorflow as tf 
class Mycallback(tf.keras.callbacks.Callback):
    def on_epoch_end(self, epoch, logs):
        if(logs["val_accuracy"]>0.99):
            print("\n Reached the required accuracy so stopped training")
            self.model.stop_training = True

In [19]:
callbacks=Mycallback()

In [20]:
start_time=timer()
history=model.fit(x_train,y_train,batch_size=50,epochs=30,validation_data=(x_test,y_test))
end_time=timer()
print(print("time taken: {0} minutes {1:.1f} seconds".format((end_time - start_time)//60, (end_time - start_time)%60)))

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30
time taken: 29.0 minutes 4.0 seconds
None


In [21]:
score = model.evaluate(x_train, y_train, verbose=0)
print("Training Accuracy: ", score[1])

score = model.evaluate(x_test, y_test, verbose=0)
print("Testing Accuracy: ", score[1])

Training Accuracy:  0.9881819486618042
Testing Accuracy:  0.969016432762146


In [25]:
model_json = model.to_json()

In [26]:
with open("Augmented_MFCCandMel_model.json", "w") as json_file:
    json_file.write(model_json)

In [27]:
model.save_weights("Augmented_MFCCandMel_model.h5")