In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
import tensorflow.keras as keras
import matplotlib.pyplot as plt
import pandas as pd
import librosa
import librosa.display
import tensorflow as tf

In [2]:
metadata=pd.read_csv(r"C:\Users\Rashiqua Munshi\Downloads\ProjectSchool\UrbanSound8K.csv")
metadata.head(10)

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing
5,100263-2-0-143.wav,100263,71.5,75.5,1,5,2,children_playing
6,100263-2-0-161.wav,100263,80.5,84.5,1,5,2,children_playing
7,100263-2-0-3.wav,100263,1.5,5.5,1,5,2,children_playing
8,100263-2-0-36.wav,100263,18.0,22.0,1,5,2,children_playing
9,100648-1-0-0.wav,100648,4.823402,5.471927,2,10,1,car_horn


In [3]:
audio_dataset_path=r"C:\Users\Rashiqua Munshi\Downloads\ProjectSchool\archive"
audio_dataset_path


'C:\\Users\\Rashiqua Munshi\\Downloads\\ProjectSchool\\archive'

In [4]:
def features_extractor(file):
    audio, sample_rate = librosa.load(file) 
    mfccs_features = librosa.feature.mfcc(y=audio, sr=sample_rate, n_mfcc=40)
    mfccs_scaled_features = np.mean(mfccs_features.T,axis=0)
    
    return mfccs_scaled_features 

In [5]:
import numpy as np
from tqdm import tqdm
import os
### Now we iterate through every audio file and extract features 
### using Mel-Frequency Cepstral Coefficients
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name = os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row["fold"])+'\\',str(row["slice_file_name"]))
    final_class_labels=row["class"]
    data=features_extractor(file_name)
    extracted_features.append([data,final_class_labels])

8732it [01:56, 74.90it/s] 


In [6]:
### converting extracted_features to Pandas dataframe
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

Unnamed: 0,feature,class
0,"[-211.93698, 62.581203, -122.81315, -60.745293...",dog_bark
1,"[-417.0052, 99.336624, -42.995586, 51.073326, ...",children_playing
2,"[-452.39316, 112.36253, -37.578068, 43.195866,...",children_playing
3,"[-406.47922, 91.1966, -25.043556, 42.78452, 11...",children_playing
4,"[-439.63873, 103.86224, -42.658787, 50.690277,...",children_playing


In [7]:
x=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [8]:
x.shape

(8732, 40)

In [9]:
y.shape

(8732,)

In [10]:
x=x.reshape((x.shape[0],1,x.shape[1]))
# x.shape

In [11]:
y

array(['dog_bark', 'children_playing', 'children_playing', ...,
       'car_horn', 'car_horn', 'car_horn'], dtype='<U16')

In [12]:
### Label Encoding
y=np.array(pd.get_dummies(y))
y

array([[False, False, False, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       [False, False,  True, ..., False, False, False],
       ...,
       [False,  True, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False],
       [False,  True, False, ..., False, False, False]])

In [13]:
### Train Test Split
from sklearn.model_selection import train_test_split
x_train,x_test,y_train,y_test=train_test_split(x,y,test_size=0.2,random_state=1)

In [14]:
x_train, x_valid, y_train, y_valid = train_test_split(x_train, y_train, test_size=0.15, random_state=42)

In [15]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense,Dropout,Activation,Flatten
from tensorflow.keras.optimizers import Adam
from sklearn import metrics

In [16]:
### No of classes
num_labels=y.shape[1]

In [17]:
# build network topology
model = keras.Sequential()

# 2 LSTM layers
model.add(keras.layers.LSTM(100, input_shape=(x_train.shape[1],x_train.shape[2]),return_sequences=True))
model.add(keras.layers.Dropout(0.2))
model.add(keras.layers.LSTM(100))
model.add(keras.layers.Dropout(0.3))

# dense layer
# model.add(keras.layers.Dense(64, activation='relu'))


# output layer
model.add(keras.layers.Dense(10,activation='softmax'))

In [18]:
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 lstm (LSTM)                 (None, 1, 100)            56400     
                                                                 
 dropout (Dropout)           (None, 1, 100)            0         
                                                                 
 lstm_1 (LSTM)               (None, 100)               80400     
                                                                 
 dropout_1 (Dropout)         (None, 100)               0         
                                                                 
 dense (Dense)               (None, 10)                1010      
                                                                 
Total params: 137,810
Trainable params: 137,810
Non-trainable params: 0
_________________________________________________________________


In [19]:

model.compile(loss='categorical_crossentropy',metrics=['accuracy'],optimizer='adam')

In [20]:
x_train.shape

(5937, 1, 40)

In [21]:
x_test.shape

(1747, 1, 40)

In [22]:
y_train.shape

(5937, 10)

In [23]:
y_test.shape

(1747, 10)

In [24]:
## Trianing my model
from tensorflow.keras.callbacks import ModelCheckpoint
from datetime import datetime 

num_epochs = 200
num_batch_size = 32

checkpointer = ModelCheckpoint(filepath='saved_models/audio_classification.hdf5', 
                               verbose=1, save_best_only=True)
start = datetime.now()

model.fit(x_train, y_train, batch_size=num_batch_size, epochs=num_epochs, validation_data=(x_valid, y_valid), callbacks=[checkpointer], verbose=1)


duration = datetime.now() - start
print("Training completed in time: ", duration)

Epoch 1/200
Epoch 1: val_loss improved from inf to 1.37658, saving model to saved_models\audio_classification.hdf5
Epoch 2/200
Epoch 2: val_loss improved from 1.37658 to 1.09690, saving model to saved_models\audio_classification.hdf5
Epoch 3/200
Epoch 3: val_loss improved from 1.09690 to 0.94259, saving model to saved_models\audio_classification.hdf5
Epoch 4/200
Epoch 4: val_loss improved from 0.94259 to 0.86297, saving model to saved_models\audio_classification.hdf5
Epoch 5/200
Epoch 5: val_loss improved from 0.86297 to 0.76090, saving model to saved_models\audio_classification.hdf5
Epoch 6/200
Epoch 6: val_loss improved from 0.76090 to 0.72426, saving model to saved_models\audio_classification.hdf5
Epoch 7/200
Epoch 7: val_loss improved from 0.72426 to 0.71346, saving model to saved_models\audio_classification.hdf5
Epoch 8/200
Epoch 8: val_loss improved from 0.71346 to 0.69776, saving model to saved_models\audio_classification.hdf5
Epoch 9/200
Epoch 9: val_loss improved from 0.69776 

Epoch 27/200
Epoch 27: val_loss improved from 0.50962 to 0.50799, saving model to saved_models\audio_classification.hdf5
Epoch 28/200
Epoch 28: val_loss did not improve from 0.50799
Epoch 29/200
Epoch 29: val_loss improved from 0.50799 to 0.48797, saving model to saved_models\audio_classification.hdf5
Epoch 30/200
Epoch 30: val_loss did not improve from 0.48797
Epoch 31/200
Epoch 31: val_loss did not improve from 0.48797
Epoch 32/200
Epoch 32: val_loss did not improve from 0.48797
Epoch 33/200
Epoch 33: val_loss did not improve from 0.48797
Epoch 34/200
Epoch 34: val_loss did not improve from 0.48797
Epoch 35/200
Epoch 35: val_loss did not improve from 0.48797
Epoch 36/200
Epoch 36: val_loss did not improve from 0.48797
Epoch 37/200
Epoch 37: val_loss did not improve from 0.48797
Epoch 38/200
Epoch 38: val_loss did not improve from 0.48797
Epoch 39/200
Epoch 39: val_loss did not improve from 0.48797
Epoch 40/200
Epoch 40: val_loss did not improve from 0.48797
Epoch 41/200
Epoch 41: val

Epoch 56/200
Epoch 56: val_loss did not improve from 0.45911
Epoch 57/200
Epoch 57: val_loss did not improve from 0.45911
Epoch 58/200
Epoch 58: val_loss did not improve from 0.45911
Epoch 59/200
Epoch 59: val_loss did not improve from 0.45911
Epoch 60/200
Epoch 60: val_loss improved from 0.45911 to 0.45156, saving model to saved_models\audio_classification.hdf5
Epoch 61/200
Epoch 61: val_loss did not improve from 0.45156
Epoch 62/200
Epoch 62: val_loss did not improve from 0.45156
Epoch 63/200
Epoch 63: val_loss did not improve from 0.45156
Epoch 64/200
Epoch 64: val_loss did not improve from 0.45156
Epoch 65/200
Epoch 65: val_loss did not improve from 0.45156
Epoch 66/200
Epoch 66: val_loss did not improve from 0.45156
Epoch 67/200
Epoch 67: val_loss did not improve from 0.45156
Epoch 68/200
Epoch 68: val_loss did not improve from 0.45156
Epoch 69/200
Epoch 69: val_loss did not improve from 0.45156
Epoch 70/200
Epoch 70: val_loss did not improve from 0.45156
Epoch 71/200
Epoch 71: va

Epoch 85/200
Epoch 85: val_loss did not improve from 0.45156
Epoch 86/200
Epoch 86: val_loss did not improve from 0.45156
Epoch 87/200
Epoch 87: val_loss did not improve from 0.45156
Epoch 88/200
Epoch 88: val_loss did not improve from 0.45156
Epoch 89/200
Epoch 89: val_loss did not improve from 0.45156
Epoch 90/200
Epoch 90: val_loss did not improve from 0.45156
Epoch 91/200
Epoch 91: val_loss did not improve from 0.45156
Epoch 92/200
Epoch 92: val_loss did not improve from 0.45156
Epoch 93/200
Epoch 93: val_loss did not improve from 0.45156
Epoch 94/200
Epoch 94: val_loss did not improve from 0.45156
Epoch 95/200
Epoch 95: val_loss did not improve from 0.45156
Epoch 96/200
Epoch 96: val_loss did not improve from 0.45156
Epoch 97/200
Epoch 97: val_loss did not improve from 0.45156
Epoch 98/200
Epoch 98: val_loss did not improve from 0.45156
Epoch 99/200
Epoch 99: val_loss did not improve from 0.45156
Epoch 100/200
Epoch 100: val_loss did not improve from 0.45156
Epoch 101/200
Epoch 10

Epoch 115/200
Epoch 115: val_loss did not improve from 0.45156
Epoch 116/200
Epoch 116: val_loss did not improve from 0.45156
Epoch 117/200
Epoch 117: val_loss did not improve from 0.45156
Epoch 118/200
Epoch 118: val_loss did not improve from 0.45156
Epoch 119/200
Epoch 119: val_loss did not improve from 0.45156
Epoch 120/200
Epoch 120: val_loss did not improve from 0.45156
Epoch 121/200
Epoch 121: val_loss did not improve from 0.45156
Epoch 122/200
Epoch 122: val_loss did not improve from 0.45156
Epoch 123/200
Epoch 123: val_loss did not improve from 0.45156
Epoch 124/200
Epoch 124: val_loss did not improve from 0.45156
Epoch 125/200
Epoch 125: val_loss did not improve from 0.45156
Epoch 126/200
Epoch 126: val_loss did not improve from 0.45156
Epoch 127/200
Epoch 127: val_loss did not improve from 0.45156
Epoch 128/200
Epoch 128: val_loss did not improve from 0.45156
Epoch 129/200
Epoch 129: val_loss did not improve from 0.45156
Epoch 130/200
Epoch 130: val_loss did not improve from 

Epoch 144/200
Epoch 144: val_loss did not improve from 0.45156
Epoch 145/200
Epoch 145: val_loss did not improve from 0.45156
Epoch 146/200
Epoch 146: val_loss did not improve from 0.45156
Epoch 147/200
Epoch 147: val_loss did not improve from 0.45156
Epoch 148/200
Epoch 148: val_loss did not improve from 0.45156
Epoch 149/200
Epoch 149: val_loss did not improve from 0.45156
Epoch 150/200
Epoch 150: val_loss did not improve from 0.45156
Epoch 151/200
Epoch 151: val_loss did not improve from 0.45156
Epoch 152/200
Epoch 152: val_loss did not improve from 0.45156
Epoch 153/200
Epoch 153: val_loss did not improve from 0.45156
Epoch 154/200
Epoch 154: val_loss did not improve from 0.45156
Epoch 155/200
Epoch 155: val_loss did not improve from 0.45156
Epoch 156/200
Epoch 156: val_loss did not improve from 0.45156
Epoch 157/200
Epoch 157: val_loss did not improve from 0.45156
Epoch 158/200
Epoch 158: val_loss did not improve from 0.45156
Epoch 159/200
Epoch 159: val_loss did not improve from 

Epoch 174/200
Epoch 174: val_loss did not improve from 0.45156
Epoch 175/200
Epoch 175: val_loss did not improve from 0.45156
Epoch 176/200
Epoch 176: val_loss did not improve from 0.45156
Epoch 177/200
Epoch 177: val_loss did not improve from 0.45156
Epoch 178/200
Epoch 178: val_loss did not improve from 0.45156
Epoch 179/200
Epoch 179: val_loss did not improve from 0.45156
Epoch 180/200
Epoch 180: val_loss did not improve from 0.45156
Epoch 181/200
Epoch 181: val_loss did not improve from 0.45156
Epoch 182/200
Epoch 182: val_loss did not improve from 0.45156
Epoch 183/200
Epoch 183: val_loss did not improve from 0.45156
Epoch 184/200
Epoch 184: val_loss did not improve from 0.45156
Epoch 185/200
Epoch 185: val_loss did not improve from 0.45156
Epoch 186/200
Epoch 186: val_loss did not improve from 0.45156
Epoch 187/200
Epoch 187: val_loss did not improve from 0.45156
Epoch 188/200
Epoch 188: val_loss did not improve from 0.45156
Epoch 189/200
Epoch 189: val_loss did not improve from 

In [26]:
train_accuracy=model.evaluate(x_train,y_train,verbose=0)
print(train_accuracy[1])

0.9954522252082825


In [25]:
test_accuracy=model.evaluate(x_test,y_test,verbose=0)
print(test_accuracy[1])

0.9055523872375488


In [27]:
!pip install flask

In [28]:
model.save("audio_model")