<h1 style = "font-size:3rem;color:DarkCyan">Feature Comparison between audio files</h1>

In [120]:
#import libraries
import numpy as np
from mpl_toolkits import mplot3d
import matplotlib.pyplot as plt
import librosa, librosa.display 
import IPython.display as ipd
import scipy
import os
import pandas as pd

import sklearn

In [121]:
# import dataset, in this case different percussion sounds

filenames = os.listdir('./data')
print(f'Number of audio files: {len(filenames)} ')


Number of audio files: 167 


<h2 style = "font-size:2rem;color:DarkCyan">Extracting features</h2>

- Extracting features (Bandwidth, central spectroid, and zero-crossing rate) for each audio file, and store in a NxF matrix
- Giving a label

In [174]:
def extract_features(data):
    return [
        np.mean(librosa.feature.spectral_centroid(y = data)),
        np.mean(librosa.feature.spectral_bandwidth(y = data)),
        np.mean(librosa.feature.spectral_flatness(y = data)),
        np.mean(librosa.feature.spectral_rolloff(y = data)),
        np.mean(librosa.feature.mfcc(y = data)),
        np.mean(librosa.feature.spectral_contrast(y = data)),
        np.mean(librosa.feature.zero_crossing_rate(y = data)),
        np.mean(librosa.feature.rms(y = data)),
        np.mean(librosa.feature.tempogram(y=data))
    ]

In [124]:
def get_label(name):
    if name.find('clap') != -1:
        return 0
    elif name.find('cymbal') != -1:
        return 1
    elif name.find('kick') != -1:
        return 2
    else:
        return 3

In [187]:
sr = 48000
feature_names = ['sc','sbw','sf', 'sr', 'mfcc', 'scon', 'zcr', 'rms', 'tg']
amount_of_features = len(feature_names)

features = np.zeros((len(filenames), amount_of_features))
labels = np.zeros(len(filenames))

idx = 0

for name in filenames:
    data, dummy = librosa.load('./data/'+name, sr = sr, mono=True)
    
    # fill prediction array with correct label 
    labels[idx] = get_label(name)
    
    # extract features
    features[idx] = extract_features(data)
    
    idx +=1
    

<h2 style = "font-size:2rem;color:DarkCyan">Plotting features</h2>

In [185]:
f_dict = {'sc' : 0, 'sbw' : 1, 'sf': 2, 'sr' : 3, 'mfcc' : 4, 'scon' : 5,'zcr' : 6, 'rms': 7, 'tg' : 8}
# choose three features

f1 = 'mfcc'
f2 = 'tg'
f3 = 'sbw'

# plot the features
%matplotlib notebook
#fig = plt.figure(figsize = (15, 9))
ax = plt.axes(projection ="3d")
ax.scatter3D(features[np.where(labels == 0),f_dict[f1]], features[np.where(labels == 0),f_dict[f2]], features[np.where(labels == 0),f_dict[f3]], label = 'clap')
ax.scatter3D(features[np.where(labels == 1),f_dict[f1]], features[np.where(labels == 1),f_dict[f2]], features[np.where(labels == 1),f_dict[f3]], label = 'cymbal')
ax.scatter3D(features[np.where(labels == 2),f_dict[f1]], features[np.where(labels == 2),f_dict[f2]], features[np.where(labels == 2),f_dict[f3]], label = 'kick')
ax.scatter3D(features[np.where(labels == 3),f_dict[f1]], features[np.where(labels == 3),f_dict[f2]], features[np.where(labels == 3),f_dict[f3]], label = 'snare')


#plt.scatter(feature2[:,0], feature2[:,1])
plt.title('Scatter plot of computed means')
ax.set_xlabel(f1)
ax.set_ylabel(f2)
ax.set_zlabel(f3)
ax.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x250317a49a0>

<h2 style = "font-size:2rem;color:DarkCyan">Scale features & plot again</h2>

- scale features for better comparison (after this step features will present zero mean and unit variance)

In [176]:
print('Mean and std before scaling',np.mean(features, axis=0),np.std(features, axis=0))

features = sklearn.preprocessing.scale(features)

print('Mean and std after scaling',np.mean(features, axis=0),np.std(features, axis=0))

Mean and std before scaling [ 1.60794752e+03  1.46614746e+03  1.78297782e-01  3.14417057e+03
 -1.81131847e+01  2.06562447e+01  8.94566901e-02  5.07136850e-02
  8.48584916e-02] [8.95756004e+02 7.02382780e+02 2.59673601e-01 1.77189515e+03
 9.59610701e+00 2.78300000e+00 6.32586899e-02 8.92786962e-02
 8.92904814e-02]
Mean and std after scaling [-9.67954924e-16 -3.90240069e-16  7.97765048e-18  4.21485867e-16
  2.88857428e-16  1.58090440e-15 -5.63753967e-16  4.48742839e-17
 -5.24530519e-16] [1. 1. 1. 1. 1. 1. 1. 1. 1.]


In [130]:
# plot the features
%matplotlib notebook
#fig = plt.figure(figsize = (15, 9))
ax = plt.axes(projection ="3d")
ax.scatter3D(features[np.where(labels == 0),f_dict[f1]], features[np.where(labels == 0),f_dict[f2]], features[np.where(labels == 0),f_dict[f3]], label = 'clap')
ax.scatter3D(features[np.where(labels == 1),f_dict[f1]], features[np.where(labels == 1),f_dict[f2]], features[np.where(labels == 1),f_dict[f3]], label = 'cymbal')
ax.scatter3D(features[np.where(labels == 2),f_dict[f1]], features[np.where(labels == 2),f_dict[f2]], features[np.where(labels == 2),f_dict[f3]], label = 'kick')
ax.scatter3D(features[np.where(labels == 3),f_dict[f1]], features[np.where(labels == 3),f_dict[f2]], features[np.where(labels == 3),f_dict[f3]], label = 'snare')


#plt.scatter(feature2[:,0], feature2[:,1])
plt.title('Scatter plot of computed means')
ax.set_xlabel(f1)
ax.set_ylabel(f2)
ax.set_zlabel(f3)
ax.legend()

<IPython.core.display.Javascript object>

<matplotlib.legend.Legend at 0x2502de3d580>

<h2 style = "font-size:2rem;color:DarkCyan">Correlation Matrix</h2>

In [191]:
df = pd.DataFrame(features, columns=feature_names)
corr = df.corr()
corr.style.format(na_rep='MISS', precision=3)  
corr.style.background_gradient(cmap='coolwarm')
# 'RdBu_r', 'BrBG_r', & PuOr_r are other good diverging colormaps

# Fill diagonal and upper half with NaNs
mask = np.zeros_like(corr, dtype=bool)
mask[np.triu_indices_from(mask)] = True
corr[mask] = np.nan
corr.style.format(na_rep='MISS', precision=3) 
(corr
 .style
 .background_gradient(cmap='coolwarm', axis=None, vmin=-1, vmax=1)
 .highlight_null(null_color='#f1f1f1')  # Color NaNs grey
 )

Unnamed: 0,sc,sbw,sf,sr,mfcc,scon,zcr,rms,tg
sc,,,,,,,,,
sbw,0.915217,,,,,,,,
sf,-0.687226,-0.721321,,,,,,,
sr,0.979052,0.967693,-0.681367,,,,,,
mfcc,-0.285682,-0.346253,-0.10081,-0.318451,,,,,
scon,0.397573,0.333714,-0.547529,0.3571,-0.08436,,,,
zcr,0.900415,0.689934,-0.548818,0.812657,-0.273633,0.403553,,,
rms,-0.178906,-0.229261,-0.116176,-0.19601,0.479864,0.184491,-0.160377,,
tg,0.370354,0.515988,-0.375817,0.428853,-0.486292,0.258185,0.240433,-0.29737,


<h2 style = "font-size:2rem;color:DarkCyan">Dimensionality Reduction</h2>

In [194]:
# from the correlation matrix can be seen that there are 
# spectral centroid, bandwidth, and roll-off are strongly correrelated 
# with other features, these can be discarded to simplify the model
features_to_exclude = ['sc', 'sbw', 'sr']
print(np.shape(features))
indexes = np.where(np.isin(feature_names,features_to_exclude))
print(indexes)

selected_features = []
new_feature_names =[]
for i in range(0,amount_of_features):
    if not np.isin(i,indexes):
        selected_features.append(features[:,i])
        new_feature_names.append(feature_names[i])
        
#features = features[:,2:]
selected_features= np.array(selected_features).T
print(np.shape(selected_features))
print(new_feature_names)
#feature_names = feature_names[2:]

(167, 9)
(array([0, 1, 3], dtype=int64),)
(167, 6)
['sf', 'mfcc', 'scon', 'zcr', 'rms', 'tg']


<h2 style = "font-size:2rem;color:DarkCyan">Save Features</h2>

- scale features for better comparison (after this step features will present zero mean and unit variance)

In [195]:
#merging everything into a single data structure
dataset = pd.DataFrame(selected_features)
dataset.columns = new_feature_names #naming the features
dataset['label'] = labels
dataset['filename'] = filenames

#we can save the dataset to a file to resume working without re-computing the features (unless you want to change them)
dataset.to_csv('dataset.csv')
dataset

Unnamed: 0,sf,mfcc,scon,zcr,rms,tg,label,filename
0,0.009266,-16.176497,20.150281,0.238694,0.042907,0.122623,0.0,clap00.wav
1,0.060073,-15.045925,20.220686,0.181085,0.091807,0.012034,0.0,clap01.wav
2,0.027080,-18.072697,20.386491,0.186275,0.060785,0.035546,0.0,clap02.wav
3,0.053378,-29.048000,23.282097,0.261476,0.059042,0.148375,0.0,clap03.wav
4,0.002444,-28.880560,21.106727,0.120479,0.012474,0.073228,0.0,clap04.wav
...,...,...,...,...,...,...,...,...
162,0.035142,-18.951557,21.133549,0.178874,0.022656,0.015246,3.0,snare_37.wav
163,0.022006,-19.046724,20.261291,0.146732,0.024521,0.038750,3.0,snare_38.wav
164,0.015060,-21.076578,22.140637,0.103575,0.019697,0.017071,3.0,snare_39.wav
165,0.021603,-20.621740,20.365652,0.131215,0.022071,0.027501,3.0,snare_40.wav
