In [1]:
import os
import sys
from os import listdir
from os.path import isfile, join
import IPython.display as ipd
import librosa 
import librosa.display
%matplotlib inline
import matplotlib.pyplot as plt
import pandas as pd
from scipy.io import wavfile as wav
import numpy as np
from timeit import default_timer as timer

In [2]:
def extract_MElandChroma_stft(filename):
    audio,sample_rate=librosa.load(filename, res_type='kaiser_fast')
    chroma_stft = np.mean(librosa.feature.chroma_stft(y=audio, sr=sample_rate,n_chroma=50).T,axis=0)
    mel=np.mean(librosa.feature.melspectrogram(audio ,sr=sample_rate, n_mels=50,fmax=8000).T, axis=0)
    
    return chroma_stft, mel

In [3]:
a=extract_MElandChroma_stft("UrbanSound8K/audio/fold1/103074-7-0-1.wav")

In [4]:
metadata=pd.read_csv("UrbanSound8k/metadata/UrbanSound8k.csv")
metadata.head()

Unnamed: 0,slice_file_name,fsID,start,end,salience,fold,classID,class
0,100032-3-0-0.wav,100032,0.0,0.317551,1,5,3,dog_bark
1,100263-2-0-117.wav,100263,58.5,62.5,1,5,2,children_playing
2,100263-2-0-121.wav,100263,60.5,64.5,1,5,2,children_playing
3,100263-2-0-126.wav,100263,63.0,67.0,1,5,2,children_playing
4,100263-2-0-137.wav,100263,68.5,72.5,1,5,2,children_playing


In [5]:
fold_list = ['fold1', 'fold2', 'fold3', 'fold4', 'fold5', 'fold6', 'fold7', 'fold8', 'fold9', 'fold10']

In [None]:
stacked_features = []
exceptions=0

start_time = timer()
for i in range(10):
    # get file names
    mypath = 'UrbanSound8K/audio/'+ fold_list[i] + '/'
    files = [mypath + f for f in listdir(mypath) if isfile(join(mypath, f))]
    
    for fn in files:
        try: # extract features
            mfccs,stft = extract_MElandChroma_stft(fn)
            features=np.reshape((np.hstack([mfccs,stft])),(20,5))
    
            
            
        except: # else exception (.ds_store files are part of mac file systems)
            print(fn)
            exceptions += 1
            continue
            
        l_row = metadata.loc[metadata['slice_file_name']==fn.split('/')[-1]].values.tolist()
        label = l_row[0][-1]
        fold = i+1
    
        stacked_features.append([features, features.shape, label, fold])
        
            #print(f,old_samplerate,ss)
        
print("Exceptions: ", exceptions)
end_time = timer()
print(print("time taken: {0} minutes {1:.1f} seconds".format((end_time - start_time)//60, (end_time - start_time)%60)))
print('Finished feature extraction from all folder')



UrbanSound8K/audio/fold1/.DS_Store




In [None]:
cols=['Stacked_Features', 'Matrix_Shape', 'Label', 'Fold']
Stacked_feature_pd=pd.DataFrame(data=stacked_features , columns=cols)
Stacked_feature_pd.head()

In [7]:
Stacked_feature_pd.Label.unique()

array(['dog_bark', 'gun_shot', 'jackhammer', 'engine_idling',
       'children_playing', 'siren', 'street_music', 'air_conditioner',
       'drilling', 'car_horn'], dtype=object)

In [8]:
from sklearn.preprocessing import LabelEncoder

X = np.reshape(np.array(Stacked_feature_pd.Stacked_Features.tolist()), (8732, 100))
y = np.array(Stacked_feature_pd.Label.tolist())

le = LabelEncoder()
yy = le.fit_transform(y)
print(yy.shape)

(8732,)


In [9]:
from sklearn.preprocessing import StandardScaler
scaler= StandardScaler()

In [10]:
from sklearn.model_selection import train_test_split 

x_train, x_test, y_train, y_test = train_test_split(scaler.fit_transform(X), yy, test_size=0.2,random_state=42)

In [11]:
print(x_train.shape, x_test.shape, y_train.shape , y_test.shape)

(6985, 100) (1747, 100) (6985,) (1747,)


In [12]:
print(x_train[0][:10])

[-0.6894514  -0.04691766 -1.0600822  -0.08705591 -0.7620535  -0.6229462
  0.16864659 -1.5024855   0.5692391  -1.4576662 ]


### Classification using KNN

In [13]:
from sklearn.neighbors import KNeighborsClassifier

In [14]:
knn_model=KNeighborsClassifier(n_neighbors=6) 

In [15]:
x_train=np.reshape(x_train,(6985,100))
x_test=np.reshape(x_test,(1747,100))
x_train.shape

(6985, 100)

In [16]:
start_time=timer()
knn_model.fit(x_train, y_train)
end_time=timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 0.00 mins and 1.10 secs


In [17]:
print("Training accuracy is {:.2f}".format(knn_model.score(x_train, y_train)))
print("Testing accuracy is {:.2f}".format(knn_model.score(x_test, y_test)))

Training accuracy is 0.91
Testing accuracy is 0.85


**Not much of a difference between Training and Testing accuracy so the model is not overfitted**

### Classification using Decision Tree

In [18]:
from sklearn.tree import DecisionTreeClassifier
tree_model=DecisionTreeClassifier()

In [19]:
start_time=timer()
tree_model.fit(x_train, y_train)
end_time=timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 0.00 mins and 3.97 secs


In [20]:
print("Training accuracy is {}".format(tree_model.score(x_train, y_train)))
print("Testing accuracy is {:.2f}".format(tree_model.score(x_test, y_test)))

Training accuracy is 1.0
Testing accuracy is 0.67


### Classification using SVM

In [21]:
from sklearn.svm import SVC

In [22]:
svc= SVC(kernel="rbf")

In [23]:
start_time=timer()
svc.fit(x_train, y_train)
end_time=timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 0.00 mins and 17.00 secs


In [24]:
print("Training accuracy is {}".format(svc.score(x_train, y_train)))
print("Testing accuracy is {:.2f}".format(svc.score(x_test, y_test)))

Training accuracy is 0.9291338582677166
Testing accuracy is 0.89


**There is a clear sign of overfitting**

### Classification using Ensemble Classifiers

In [25]:
from sklearn.ensemble import RandomForestClassifier

In [26]:
rf=RandomForestClassifier(n_estimators=500)

In [27]:
start_time=timer()
rf.fit(x_train, y_train)
end_time=timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 2.00 mins and 8.11 secs


In [28]:
print("Training accuracy is {}".format(rf.score(x_train, y_train)))
print("Testing accuracy is {:.2f}".format(rf.score(x_test, y_test)))

Training accuracy is 1.0
Testing accuracy is 0.92


**Overfitting using RandomForestClassifiers**

In [29]:
from sklearn.ensemble import BaggingClassifier, AdaBoostClassifier

In [30]:
bg=BaggingClassifier()

In [31]:
start_time = timer()
bg.fit(x_train, y_train)
end_time = timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 0.00 mins and 21.62 secs


In [32]:
print("Training accuracy of the Bagging Classifier {:.2f}".format(bg.score(x_train, y_train)) )
print("Testing accuracy of the Bagging Classifier {:.2f}".format(bg.score(x_test, y_test)) )

Training accuracy of the Bagging Classifier 1.00
Testing accuracy of the Bagging Classifier 0.82


In [33]:
ada= AdaBoostClassifier()

In [34]:
start_time= timer()
ada.fit(x_train, y_train)
end_time= timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 0.00 mins and 17.83 secs


In [35]:
print("Training accuracy of the AdaBoost Classifier {:.2f}".format(ada.score(x_train, y_train)) )
print("Testing accuracy of the AdaBoost Classifier {:.2f}".format(ada.score(x_test, y_test)) )

Training accuracy of the AdaBoost Classifier 0.42
Testing accuracy of the AdaBoost Classifier 0.42


In [36]:
from sklearn.ensemble import GradientBoostingClassifier

In [37]:
gbc = GradientBoostingClassifier()
start_time= timer()
gbc.fit(x_train, y_train)
end_time= timer()
print("Total time required is {:.2f} mins and {:.2f} secs".format( (end_time-start_time)//60, (end_time-start_time)%60 ) )

Total time required is 12.00 mins and 54.99 secs


In [38]:
print("Training accuracy of the Gradient Boosting Classifier {:.2f}".format(gbc.score(x_train, y_train)) )
print("Testing accuracy of the Gradient Boosting Classifier {:.2f}".format(gbc.score(x_test, y_test)) )

Training accuracy of the Gradient Boosting Classifier 0.96
Testing accuracy of the Gradient Boosting Classifier 0.85


In [39]:
from sklearn.linear_model import LogisticRegression
log =LogisticRegression(max_iter=1000)

In [40]:
log.fit(x_train, y_train)

LogisticRegression(max_iter=1000)

In [41]:
print("Training accuracy of the Gradient Boosting Classifier {:.2f}".format(log.score(x_train, y_train)) )
print("Testing accuracy of the Gradient Boosting Classifier {:.2f}".format(log.score(x_test, y_test)) )

Training accuracy of the Gradient Boosting Classifier 0.74
Testing accuracy of the Gradient Boosting Classifier 0.69


In [42]:
### Unsupervised Learning 
from sklearn.cluster import KMeans
kms= KMeans(n_clusters=10)

In [43]:
kms.fit(x_train)

KMeans(n_clusters=10)

In [44]:
pd.crosstab(kms.predict(x_train), y_train)

col_0,0,1,2,3,4,5,6,7,8,9
row_0,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1
0,190,28,68,35,373,204,19,308,24,75
1,0,82,0,0,16,0,0,0,1,5
2,0,21,0,0,0,0,0,0,0,0
3,299,51,362,198,59,321,30,14,240,218
4,0,3,0,0,15,0,0,54,0,0
5,88,10,95,67,17,90,26,60,223,149
6,30,20,126,254,56,66,103,2,160,201
7,187,74,62,46,216,109,6,345,55,132
8,11,13,89,210,26,13,133,0,22,26
9,0,54,0,0,0,0,0,0,0,0


In [1]:
### Visualizing the clusters formed with the MFCC features 
from sklearn.decomposition import PCA

In [2]:
cols=["mfcc_"+str(i) for i in range(1,101)]

In [3]:
X.shape

NameError: name 'X' is not defined

In [None]:
mfcc_pd=pd.DataFrame(data=scaler.fit_transform(X), columns=cols)
mfcc_pd["labels"]=y
mfcc_pd.head()


In [None]:
pca=PCA(n_components=2)

In [None]:
pcas=pca.fit_transform(mfcc_pd.drop("labels", axis=1))

In [None]:
pca_cols=["pca_"+str(i) for i in range(1,3)]

In [None]:
main_df=pd.DataFrame(data=pcas, columns=pca_cols)
main_df["labels"]=y
main_df.head()

In [None]:
main_df.labels.unique()

In [None]:
fig = plt.figure(figsize = (8,8))
ax = fig.add_subplot(1,1, 1) 
ax.set_xlabel('Principal Component 1', fontsize = 15)
ax.set_ylabel('Principal Component 2', fontsize = 15)
ax.set_title('2 component PCA', fontsize = 20)
targets = ['dog_bark', 'gun_shot', 'jackhammer', 'engine_idling',
       'children_playing', 'siren', 'street_music', 'air_conditioner',
       'drilling', 'car_horn']
colors = ['#1f77b4', '#ff7f0e', '#2ca02c', '#d62728', '#9467bd', '#8c564b', '#e377c2', '#7f7f7f', '#bcbd22', '#17becf']
for target, color in zip(targets,colors):
    indicesToKeep = main_df['labels']== target
    #print(indicesToKeep[:10])
    ax.scatter(list(main_df.loc[indicesToKeep, 'PCA_1'])[:100]
               ,list(main_df.loc[indicesToKeep, 'PCA_2'])[:100]
               , c = color
               , s = 50)
ax.legend(['dog_bark', 'gun_shot', 'jackhammer', 'engine_idling',
       'children_playing', 'siren', 'street_music', 'air_conditioner',
       'drilling', 'car_horn'])
ax.grid()

In [4]:
pcas=pca.explained_variance_ratio_
print("Total information ", sum(pcas))

NameError: name 'pca' is not defined

In [None]:
x_train, x_test, y_train, y_test = train_test_split(main_df.drop("labels", axis=1), main_df["labels"], test_size=0.2, random_state=42)

In [None]:
print(x_train.shape, x_test.shape, y_train.shape , y_test.shape)

In [None]:
svc_2=SVC()
svc_2.fit(x_train, y_train)

In [None]:
svc_2.score(x_test, y_test)

In [None]:
x_test.shape[1]

In [None]:
# Predictions using SVC on 50 features
predictions=svc_2.predict(x_test)


In [None]:
from sklearn.metrics import confusion_matrix

In [None]:
 mat=confusion_matrix(predictions, y_test)

In [None]:
def acc_per_class(np_probs_array):    
    accs = []
    for idx in range(0, np_probs_array.shape[0]):
        correct = np_probs_array[idx][idx].astype(int)
        total = np_probs_array[idx].sum().astype(int)
        acc = (correct / total) * 100
        accs.append(acc)
    return accs

In [None]:
accuracies=acc_per_class(mat)
labels = [
        'Air Conditioner',
        'Car Horn',
        'Children Playing',
        'Dog bark',
        'Drilling',
        'Engine Idling',
        'Gun Shot',
        'Jackhammer',
        'Siren',
        'Street Music'
    ]

pd.DataFrame({'CLASS':labels, 'ACCURACY': accuracies}).sort_values(by='ACCURACY', ascending=False)