# Importing libraries and metadata that contains location and type of each sound file

In [2]:
import numpy as np
from tqdm import tqdm
import pandas as pd
import os
import librosa
import librosa.display
import IPython.display as ipd

audio_dataset_path='C:/Post Graduate Course in Data Analytics/CAPSTONE PROJECT 2/audio/'
metadata=pd.read_csv('C:/Post Graduate Course in Data Analytics/CAPSTONE PROJECT 2/metadata/UrbanSound8K.csv')
metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


# Extract Features

We will be using Mel-Frequency Cepstral Coefficients (MFCC) from the audio samples. The MFCC summarizes the frequency distribution across the window size, so it is possible to analyse both the frequency and time characteristics of the sound. These audio representations will allow us to identify features for classification.

# Creating a User-Defined Function for extracting features using MFCC method for interpreting the audio signals in form of array

In [2]:
def features_extractor(file):
    audio,sample_rate=librosa.load(file_name,res_type='kaiser_fast')
    mfccs_features=librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=40)
    mfccs_scaled_features=np.mean(mfccs_features.T,axis=0)
    return mfccs_scaled_features

# Running tqdm for iterating each row in the metadata for accessing each and every audio sound as per the file and folder names given

In [3]:
extracted_features=[]
for index_num,row in tqdm(metadata.iterrows()):
    file_name=os.path.join(os.path.abspath(audio_dataset_path),'fold'+str(row["fold"])+'/',str(row["slice_file_name"]))
    final_class_labels=row["class"]
    data=features_extractor(file_name)
    extracted_features.append([data,final_class_labels])

8731it [18:37,  7.81it/s]


# Transforming the list of extracted features and class in form of dataframe

In [4]:
extracted_features_df=pd.DataFrame(extracted_features,columns=['feature','class'])
extracted_features_df.head()

Unnamed: 0,feature,class
0,"[-215.79301, 71.66612, -131.81377, -52.091328,...",dog_bark
1,"[-424.68677, 110.56227, -54.148235, 62.01073, ...",children_playing
2,"[-459.56467, 122.800354, -47.92471, 53.265694,...",children_playing
3,"[-414.55377, 102.896904, -36.66495, 54.180405,...",children_playing
4,"[-447.397, 115.0954, -53.809113, 61.608585, 1....",children_playing


# Splitting the dataframe into independent and dependent variables

In [5]:
x=np.array(extracted_features_df['feature'].tolist())
y=np.array(extracted_features_df['class'].tolist())

In [6]:
x.shape

(8731, 40)

In [7]:
y

array(['dog_bark', 'children_playing', 'children_playing', ...,
       'car_horn', 'car_horn', 'car_horn'], dtype='<U16')

# Further splitting the dataset into train and test parts

In [8]:
from sklearn.model_selection import train_test_split
xtrain,xtest,ytrain,ytest=train_test_split(x,y,test_size=0.25,random_state=123)

# Training the model through Extreme Gradient Boosting Classifier model

In [4]:
from xgboost import XGBClassifier
xgb=XGBClassifier()

In [10]:
xgbmodel=xgb.fit(xtrain,ytrain)





# Making predictions on the train data

In [11]:
ypred_trainxgb=xgbmodel.predict(xtrain)
ypred_trainxgb

array(['air_conditioner', 'drilling', 'dog_bark', ..., 'engine_idling',
       'air_conditioner', 'children_playing'], dtype='<U16')

# Making predictions on the test data

In [12]:
ypred_xgb=xgbmodel.predict(xtest)
ypred_xgb

array(['air_conditioner', 'street_music', 'street_music', ..., 'drilling',
       'street_music', 'gun_shot'], dtype='<U16')

# Checking the accuracy for test data

In [13]:
from sklearn.metrics import classification_report,accuracy_score

In [15]:
print(classification_report(ytest,ypred_xgb))

                  precision    recall  f1-score   support

 air_conditioner       0.97      0.96      0.96       253
        car_horn       0.98      0.78      0.87       105
children_playing       0.83      0.84      0.84       270
        dog_bark       0.85      0.82      0.83       253
        drilling       0.89      0.94      0.91       231
   engine_idling       0.93      0.96      0.94       231
        gun_shot       0.99      0.86      0.92        99
      jackhammer       0.93      0.94      0.94       256
           siren       0.96      0.95      0.95       239
    street_music       0.77      0.84      0.81       246

        accuracy                           0.90      2183
       macro avg       0.91      0.89      0.90      2183
    weighted avg       0.90      0.90      0.90      2183



In [16]:
accuracy=accuracy_score(ytest,ypred_xgb)
accuracy=round(accuracy*100,2)
print(accuracy,'%')

89.6 %


# Saving the Model

In [17]:
xgbmodel.save_model('C:/Post Graduate Course in Data Analytics/CAPSTONE PROJECT 2/Extreme Gradient Boosting Model.txt')

# Loading the Model to make predictions

In [5]:
pred_model=XGBClassifier()
pred_model.load_model('C:/Post Graduate Course in Data Analytics/CAPSTONE PROJECT 2/Extreme Gradient Boosting Model.txt')

# Making an User-Defined Function to predict the source of audio signals

In [6]:
def predict_audio(file):
    audio,sample_rate=librosa.load(file,res_type='kaiser_fast')
    mfccs_features=librosa.feature.mfcc(y=audio,sr=sample_rate,n_mfcc=40)
    mfccs_scaled_features=np.mean(mfccs_features.T,axis=0)
    mfccs_scaled_features=mfccs_scaled_features.reshape(1,-1)
    prediction=pred_model.predict(mfccs_scaled_features)
    return print(prediction[0])

# Loading the sound file and getting the predicted source

In [12]:
filename='C:/Post Graduate Course in Data Analytics/CAPSTONE PROJECT 2/audio/fold10/25037-6-0-0.wav'
predict_audio(filename)
ipd.Audio(filename)

gun_shot
