# Urban Sounds Classification using Deep Learning

## Udacity-MLND

### Multiple_Feature_MLP

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import os
from tqdm import tqdm
#importing Audio Visualisation Libraries
from librosa import display
import librosa
#Getting the csv as dataframe using pandas
data=pd.read_csv("UrbanSound8K/metadata/UrbanSound8K.csv")

In [2]:
#displaying the structure of the CSV
data.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [3]:
#preprocessing using all features set
x_train=[]
x_test=[]
y_train=[]
y_test=[]
path="UrbanSound8K/audio/fold"
for i in tqdm(range(len(data))):
    fold_no=str(data.iloc[i]["fold"])
    file=data.iloc[i]["slice_file_name"]
    label=data.iloc[i]["classID"]
    filename=path+fold_no+"/"+file
    y,sr=librosa.load(filename)
    mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0)
    melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0)
    chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0)
    features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens)),(40,5))
    if(fold_no!='10'):
      x_train.append(features)
      y_train.append(label)
    else:
      x_test.append(features)
      y_test.append(label)

100%|████████████████████████████████████████████████████████████████████████████| 8732/8732 [1:04:30<00:00,  2.83it/s]


In [4]:
#converting the lists into numpy arrays
x_train=np.array(x_train)
x_test=np.array(x_test)
y_train=np.array(y_train)
y_test=np.array(y_test)
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((7895, 40, 5), (837, 40, 5), (7895,), (837,))

In [5]:
#reshaping into 2d to save in csv format
x_train_2d=np.reshape(x_train,(x_train.shape[0],x_train.shape[1]*x_train.shape[2]))
x_test_2d=np.reshape(x_test,(x_test.shape[0],x_test.shape[1]*x_test.shape[2]))
x_train_2d.shape,x_test_2d.shape

((7895, 200), (837, 200))

In [6]:
#saving the data numpy arrays
np.savetxt("train_data_multi.csv", x_train_2d, delimiter=",")
np.savetxt("test_data_multi.csv",x_test_2d,delimiter=",")
np.savetxt("train_labels_multi.csv",y_train,delimiter=",")
np.savetxt("test_labels_multi.csv",y_test,delimiter=",")

In [2]:
#extracting data from csv files into numpy arrays
from numpy import genfromtxt
x_train = genfromtxt('train_data_multi.csv', delimiter=',')
y_train = genfromtxt('train_labels_multi.csv', delimiter=',')
x_test = genfromtxt('test_data_multi.csv', delimiter=',')
y_test = genfromtxt('test_labels_multi.csv', delimiter=',')

In [3]:
#shape
x_train.shape,x_test.shape,y_train.shape,y_test.shape

((7895, 200), (837, 200), (7895,), (837,))

In [4]:
#converting to one hot
from keras.utils.np_utils import to_categorical
y_train = to_categorical(y_train, num_classes=10)
y_test = to_categorical(y_test, num_classes=10)
y_train.shape,y_test.shape

Using TensorFlow backend.
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])
  _np_qint8 = np.dtype([("qint8", np.int8, 1)])
  _np_quint8 = np.dtype([("quint8", np.uint8, 1)])
  _np_qint16 = np.dtype([("qint16", np.int16, 1)])
  _np_quint16 = np.dtype([("quint16", np.uint16, 1)])
  _np_qint32 = np.dtype([("qint32", np.int32, 1)])
  np_resource = np.dtype([("resource", np.ubyte, 1)])


((7895, 10), (837, 10))

In [19]:
#shapes
x_train.shape,x_test.shape

((7895, 200), (837, 200))

In [5]:
from keras import Sequential
from keras.layers import Dense,Dropout,Activation

In [7]:
#forming model
model=Sequential()
#building the model
model.add(Dense(units=256,activation='relu',input_dim=200))
model.add(Dropout(0.5))
model.add(Dense(units=256,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=256,activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(units=10,activation='softmax'))

In [8]:
#compiling
model.compile(optimizer='adam',loss='categorical_crossentropy',metrics=['accuracy'])

W0831 20:59:28.733877 16832 deprecation_wrapper.py:119] From C:\Anaconda3\lib\site-packages\keras\optimizers.py:790: The name tf.train.Optimizer is deprecated. Please use tf.compat.v1.train.Optimizer instead.

W0831 20:59:28.775738 16832 deprecation_wrapper.py:119] From C:\Anaconda3\lib\site-packages\keras\backend\tensorflow_backend.py:3295: The name tf.log is deprecated. Please use tf.math.log instead.



In [11]:
from keras.callbacks import ModelCheckpoint 
#fitting
model.fit(x_train,y_train,epochs=50,validation_data=(x_test,y_test),batch_size=50,callbacks=[ModelCheckpoint(filepath='Saved_models/multi_mlp.hdf5', verbose=1, save_best_only=True)], verbose=1)

Train on 7895 samples, validate on 837 samples
Epoch 1/50

Epoch 00001: val_loss improved from inf to 1.20225, saving model to Saved_models/multi_mlp.hdf5
Epoch 2/50

Epoch 00002: val_loss improved from 1.20225 to 1.17137, saving model to Saved_models/multi_mlp.hdf5
Epoch 3/50

Epoch 00003: val_loss did not improve from 1.17137
Epoch 4/50

Epoch 00004: val_loss did not improve from 1.17137
Epoch 5/50

Epoch 00005: val_loss did not improve from 1.17137
Epoch 6/50

Epoch 00006: val_loss did not improve from 1.17137
Epoch 7/50

Epoch 00007: val_loss did not improve from 1.17137
Epoch 8/50

Epoch 00008: val_loss did not improve from 1.17137
Epoch 9/50



Epoch 00009: val_loss did not improve from 1.17137
Epoch 10/50

Epoch 00010: val_loss did not improve from 1.17137
Epoch 11/50

Epoch 00011: val_loss did not improve from 1.17137
Epoch 12/50

Epoch 00012: val_loss did not improve from 1.17137
Epoch 13/50

Epoch 00013: val_loss did not improve from 1.17137
Epoch 14/50

Epoch 00014: val_loss improved from 1.17137 to 1.16821, saving model to Saved_models/multi_mlp.hdf5
Epoch 15/50

Epoch 00015: val_loss did not improve from 1.16821
Epoch 16/50



Epoch 00016: val_loss did not improve from 1.16821
Epoch 17/50

Epoch 00017: val_loss did not improve from 1.16821
Epoch 18/50

Epoch 00018: val_loss did not improve from 1.16821
Epoch 19/50

Epoch 00019: val_loss did not improve from 1.16821
Epoch 20/50

Epoch 00020: val_loss did not improve from 1.16821
Epoch 21/50

Epoch 00021: val_loss improved from 1.16821 to 1.16043, saving model to Saved_models/multi_mlp.hdf5
Epoch 22/50

Epoch 00022: val_loss did not improve from 1.16043
Epoch 23/50



Epoch 00023: val_loss did not improve from 1.16043
Epoch 24/50

Epoch 00024: val_loss did not improve from 1.16043
Epoch 25/50

Epoch 00025: val_loss did not improve from 1.16043
Epoch 26/50

Epoch 00026: val_loss did not improve from 1.16043
Epoch 27/50

Epoch 00027: val_loss did not improve from 1.16043
Epoch 28/50

Epoch 00028: val_loss did not improve from 1.16043
Epoch 29/50

Epoch 00029: val_loss did not improve from 1.16043
Epoch 30/50

Epoch 00030: val_loss did not improve from 1.16043
Epoch 31/50



Epoch 00031: val_loss did not improve from 1.16043
Epoch 32/50

Epoch 00032: val_loss did not improve from 1.16043
Epoch 33/50

Epoch 00033: val_loss did not improve from 1.16043
Epoch 34/50

Epoch 00034: val_loss did not improve from 1.16043
Epoch 35/50

Epoch 00035: val_loss did not improve from 1.16043
Epoch 36/50

Epoch 00036: val_loss did not improve from 1.16043
Epoch 37/50

Epoch 00037: val_loss did not improve from 1.16043
Epoch 38/50

Epoch 00038: val_loss did not improve from 1.16043
Epoch 39/50

Epoch 00039: val_loss did not improve from 1.16043
Epoch 40/50



Epoch 00040: val_loss did not improve from 1.16043
Epoch 41/50

Epoch 00041: val_loss did not improve from 1.16043
Epoch 42/50

Epoch 00042: val_loss did not improve from 1.16043
Epoch 43/50

Epoch 00043: val_loss did not improve from 1.16043
Epoch 44/50

Epoch 00044: val_loss did not improve from 1.16043
Epoch 45/50

Epoch 00045: val_loss did not improve from 1.16043
Epoch 46/50

Epoch 00046: val_loss did not improve from 1.16043
Epoch 47/50

Epoch 00047: val_loss did not improve from 1.16043
Epoch 48/50



Epoch 00048: val_loss did not improve from 1.16043
Epoch 49/50

Epoch 00049: val_loss did not improve from 1.16043
Epoch 50/50

Epoch 00050: val_loss did not improve from 1.16043


<keras.callbacks.History at 0x22521a33f28>

In [12]:
model.summary()

# Calculate training accuracy 
score_train = model.evaluate(x_train, y_train, verbose=1)
score_test = model.evaluate(x_test, y_test, verbose=1)
accuracy_train = 100*score_train[1]
accuracy_test = 100*score_test[1]

_________________________________________________________________
Layer (type)                 Output Shape              Param #   
dense_5 (Dense)              (None, 256)               51456     
_________________________________________________________________
dropout_4 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_6 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_5 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_7 (Dense)              (None, 256)               65792     
_________________________________________________________________
dropout_6 (Dropout)          (None, 256)               0         
_________________________________________________________________
dense_8 (Dense)              (None, 10)                2570      
Total para

In [13]:
print("Training accuracy: %.4f%%" % accuracy_train)
print("Test accuracy: %.4f%%" % accuracy_test)

Training accuracy: 94.9335%
Test accuracy: 64.8746%


In [72]:
def extract_features(file_path):
    y,sr=librosa.load(file_path)
    mfccs = np.mean(librosa.feature.mfcc(y, sr, n_mfcc=40).T,axis=0)
    melspectrogram = np.mean(librosa.feature.melspectrogram(y=y, sr=sr, n_mels=40,fmax=8000).T,axis=0)
    chroma_stft=np.mean(librosa.feature.chroma_stft(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cq = np.mean(librosa.feature.chroma_cqt(y=y, sr=sr,n_chroma=40).T,axis=0)
    chroma_cens = np.mean(librosa.feature.chroma_cens(y=y, sr=sr,n_chroma=40).T,axis=0)
    features=np.reshape(np.vstack((mfccs,melspectrogram,chroma_stft,chroma_cq,chroma_cens)),(1,200))
    #print(features.shape)
    return features

In [73]:
class_lable=["air_conditioner","car_horn","children_playing","dog_bark","drilling","engine_idling","gun_shot","jackhammer","siren","street_music"]
def print_prediction(file_path):
    features = extract_features(file_path)
    predicted_vector = model.predict_classes(features)
    predicted_class = class_lable[predicted_vector[0]] 
    print("The predicted class is:", predicted_class, '\n') 

In [69]:
file_path='UrbanSound8K/audio/fold5/100032-3-0-0.wav'
print_prediction(file_path)

(1, 200)
The predicted class is: dog_bark 



In [74]:
file_path='UrbanSound8K/audio/fold7/101848-9-0-0.wav'
print_prediction(file_path)

The predicted class is: street_music 



In [75]:
file_path='UrbanSound8K/audio/fold10/200460-6-1-0.wav'
print_prediction(file_path)

The predicted class is: gun_shot 



In [76]:
file_path='Sample Auido/Car_horn.wav'
print_prediction(file_path)

The predicted class is: street_music 



In [77]:
file_path='Sample Auido/dog_bark.wav'
print_prediction(file_path)

The predicted class is: dog_bark 



In [79]:
file_path='Sample Auido/drilling.wav'
print_prediction(file_path)

The predicted class is: drilling 



In [80]:
file_path='Sample Auido/engine.wav'
print_prediction(file_path)

The predicted class is: engine_idling 

