In [None]:
from distutils.version import LooseVersion
import warnings
import tensorflow as tf

# Check TensorFlow Version
assert LooseVersion(tf.__version__) >= LooseVersion('1.0'), 'Please use TensorFlow version 1.0 or newer.  You are using {}'.format(tf.__version__)
print('TensorFlow Version: {}'.format(tf.__version__))

# Check for a GPU WITH GPUTEST KERNAL NOT THE USUAL PYTHON3 KERNAL
if not tf.test.gpu_device_name():
    warnings.warn('No GPU found. Please ensure you have installed TensorFlow correctly')
else:
    print('Default GPU Device: {}'.format(tf.test.gpu_device_name()))

In [None]:
import os
import sys
from os import listdir
from os.path import isfile, join
import IPython.display as ipd
import librosa 
import librosa.display
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import wavfile as wav
import numpy as np
from timeit import default_timer as timer



In [None]:
filename='E:/Thesis/UrbanSound8K/UrbanSound8K/audio/fold3/6988-5-0-4.wav'
ipd.Audio(filename)

In [None]:
def extract_MElandChroma_cqt(filename):
    audio,sample_rate=librosa.load(filename, res_type='kaiser_fast')
    chroma_cqt = np.mean(librosa.feature.chroma_cqt(y=audio, sr=sample_rate,n_chroma=50).T,axis=0)
    mel=np.mean(librosa.feature.melspectrogram(audio ,sr=sample_rate, n_mels=50,fmax=8000).T, axis=0)
    
    return chroma_cqt, mel

In [None]:
start_time=timer()
filename='E:/Dataset/UrbanSound8K/audio/fold3/6988-5-0-4.wav'
a, b=extract_Chroma_cqtandMFCC(filename)
end_time=timer()
print('Time taken to extract features from one file {:.3f}secs'.format((end_time-start_time)/60))
print(a.shape)

In [None]:
metadata=pd.read_csv('E:/Dataset/UrbanSound8K/Augmented_metadata/UrbanSound8k_Augmented.csv')
metadata_ori=pd.read_csv('E:/Dataset/UrbanSound8K/metadata/UrbanSound8K.csv')
metadata.head()




In [None]:
fold_list = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'fold10']

In [None]:


Augment_list= ['pitch_2', 'pitch_-2','pitch_time290','pitch_time-290','pitch_time2110','pitch_time-2110','speed_90','speed_110']



In [None]:
stacked_features = []
exceptions=0
count=0

start_time = timer()
for i in range(10):
    for j in range(8):
  
        mypath = 'E:/Dataset/UrbanSound8K/Augmented_audio/'+ fold_list[i] + '/' + Augment_list[j] + '/'
        files = [mypath + f for f in listdir(mypath) if isfile(join(mypath, f))]

        for fn in files:
            try: 
                 a,b=extract_Chroma_cqtandMFCC(fn)
                 features=np.reshape((np.hstack([a,b])),(20,5))
            
            


            except: 
                print(fn)
                exceptions += 1
                continue

            l_row = metadata.loc[metadata['file']==fn.split('/')[-1]].values.tolist()
            label = l_row[0][-4]
            fold = i+1
            

            stacked_features.append([features, features.shape, label, fold])

            
for i in range(10):
    
    mypath = 'E:/Dataset/UrbanSound8K/audio/'+ fold_list[i] + '/'
    files = [mypath + f for f in listdir(mypath) if isfile(join(mypath, f))]
    
    for fn in files:
        try:
             a,b=extract_Chroma_cqtandMFCC(fn)
             features=np.reshape((np.hstack([a,b])),(20,5))
            
        except: 
            print(fn)
            exceptions += 1
            continue
            
        l_row = metadata_ori.loc[metadata_ori['slice_file_name']==fn.split('/')[-1]].values.tolist()
        label = l_row[0][-1]
        fold = i+1
    
        stacked_features.append([features, features.shape, label, fold])
        
        
print("Exceptions: ", exceptions)
end_time = timer()
print(print("time taken: {0} minutes {1:.1f} seconds".format((end_time - start_time)//60, (end_time - start_time)%60)))
print('Finished feature extraction from all folder')

In [None]:
cols=['Stacked_Features', 'Matrix_Shape', 'Label', 'Fold']
Stacked_feature_pd=pd.DataFrame(data=stacked_features , columns=cols)
Stacked_feature_pd.head()

In [None]:
from sklearn.preprocessing import LabelEncoder
from keras.utils import to_categorical


X = np.array(Stacked_feature_pd.Stacked_Features.tolist())
y = np.array(Stacked_feature_pd.Label.tolist())


le = LabelEncoder()
yy = to_categorical(le.fit_transform(y))

In [None]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Dropout, Activation, Flatten
from tensorflow.keras.layers import Convolution2D, Conv2D, MaxPooling2D, GlobalAveragePooling2D,LSTM,TimeDistributed
from tensorflow.keras.optimizers import Adam
from keras.utils import np_utils
from sklearn import metrics

#LSTM MODEL

In [None]:


from sklearn.model_selection import train_test_split 

x_train_lstm, x_test_lstm, y_train_lstm, y_test_lstm = train_test_split(X, yy, test_size=0.2, random_state=42)



In [None]:
print(x_train_lstm.shape, x_test_lstm.shape, y_train_lstm.shape, y_test_lstm.shape, yy.shape)

In [None]:
x_train_lstm=np.reshape(x_train_lstm,(x_train_lstm.shape[0], 20,5))
x_test_lstm=np.reshape(x_test_lstm,(x_test_lstm.shape[0], 20,5))
print(x_train_lstm.shape, x_test_lstm.shape)

In [None]:
print(x_train_lstm.shape, x_test_lstm.shape, y_train_lstm.shape, y_test_lstm.shape, yy.shape)

In [None]:
model2 = Sequential()
model2.add(LSTM(128, return_sequences =True, input_shape=(20,5)))
model2.add(LSTM(128, return_sequences =True, dropout=0.3))
           
model2.add(TimeDistributed(Dense(256, activation='relu')))
model2.add(TimeDistributed(Dense(512, activation='relu')))
model2.add(Flatten())
model2.add(Dense(10, activation='softmax'))



In [None]:
model2.compile(loss='categorical_crossentropy', metrics=['accuracy'], optimizer='adam')
model2.summary()

In [None]:
score2 = model2.evaluate(x_test_lstm, y_test_lstm, verbose=0)
accuracy2 = 100*score2[1]

print("Pre-training accuracy: %.4f%%" % accuracy2)

In [None]:
start_time2=timer()
history = model2.fit(x_train_lstm,y_train_lstm,batch_size=50,epochs=30,validation_data=(x_test_lstm,y_test_lstm))
end_time2=timer()
print(print("time taken: {0} minutes {1:.1f} seconds".format((end_time2 - start_time2)//60, (end_time - start_time)%60)))



In [None]:
score2 = model2.evaluate(x_train_lstm, y_train_lstm, verbose=0)
print("Training Accuracy: ", score2[1]*100)

score2 = model2.evaluate(x_test_lstm, y_test_lstm, verbose=0)
print("Testing Accuracy: ", score2[1]*100)

In [None]:
def plot_accuracy():
    plt.plot(history.history['accuracy'])
    plt.plot(history.history['val_accuracy'])
    plt.title('Model Accuracy')
    plt.ylabel('Accuracy')
    plt.xlabel('No of epoch')
    plt.legend(['train', 'test'], loc='upper left')
    plt.show()
    
def plot_loss():
    plt.plot(history.history['loss'], label='Training')
    plt.plot(history.history['val_loss'], label='Testing')
    plt.title('Model Loss')
    plt.ylabel('Loss')
    plt.xlabel('No of epoch')
    plt.legend(loc="upper left")
    plt.show()

In [None]:
plt.figure(figsize=(12,4))
plot_accuracy()
plt.figure(figsize=(12,4))
plot_loss()

In [None]:
Class_Label2 = {'Air_conditioner':0, 'Car_horn':1, 'Children_playing':2, 'Dog_Bark':3, 
               'Drilling':4,'Engine_idling':5, 'Gun_Shot' :6, 'Jackhammer': 7 ,'Siren':8,'Street_Music':9 }

In [None]:
from sklearn.metrics import classification_report

y_true2 = np.argmax(y_test_lstm, axis = 1)
y_pred2 = model2.predict(x_test_lstm)
y_pred2 = np.argmax(y_pred2, axis=1)
labels2 = [0,1,2,3,4,5,6,7,8,9]
target_names2 = Class_Label2.keys()

print(y_true2.shape, y_pred2.shape)
print(classification_report(y_true2, y_pred2, target_names=target_names2))



In [None]:
from sklearn.metrics import accuracy_score
z=np.around((accuracy_score(y_true2, y_pred2)*100), decimals=0)
print(z)



In [None]:


from sklearn.metrics import confusion_matrix
print(confusion_matrix(y_true2, y_pred2))



In [None]:
from sklearn.metrics import confusion_matrix
import seaborn as sns
import matplotlib.pyplot as plt

ax=plt.subplot()
mat = confusion_matrix(y_true2, y_pred2)
plt.figure(figsize=(10,4))
sns.heatmap(mat, square=False, annot=True, fmt='d',cbar=False,linewidths=.5,xticklabels=Class_Label2.keys(),yticklabels=Class_Label2.keys(), ax=ax, cmap='PuBuGn')
bottom, top= ax.get_ylim()
ax.set_ylim(bottom + 0.5 , top -0.5)
ax.set_title('Confusion Matrix')
ax.set_xlabel('True label')
ax.set_ylabel('Predicted label');

In [None]:
def acc_per_class2(np_probs_array):    
    accs = []
    for idx in range(0, np_probs_array.shape[0]):
        correct = np_probs_array[idx][idx].astype(int)
        total = np_probs_array[idx].sum().astype(int)
        acc = (correct / total) * 100
        accs.append(acc)
    return accs

In [None]:


accuracies2=acc_per_class2(mat)
labels2 = [
        'Air Conditioner',
        'Car Horn',
        'Children Playing',
        'Dog bark',
        'Drilling',
        'Engine Idling',
        'Gun Shot',
        'Jackhammer',
        'Siren',
        'Street Music'
    ]

pd.DataFrame({'CLASS':labels2, 'ACCURACY': accuracies2}).sort_values(by='ACCURACY', ascending=False)



In [None]:
Y2=np.around(accuracies2 , decimals=1)
labels2 = [
        'Air Conditioner',
        'Car Horn',
        'Children Playing',
        'Dog bark',
        'Drilling',
        'Engine Idling',
        'Gun Shot',
        'Jackhammer',
        'Siren',
        'Street Music'
    ]
index2=np.arange(len(labels2))
plt.figure(figsize=(10,6))
plt.bar(index2, Y2, color='olive')
plt.xticks(index2, labels2, rotation=90)
plt.xlabel('Class_Name')
plt.ylabel('Accuracy')
plt.title('Accuracy per Class')