In [8]:
import numpy as np
import librosa
import os
import matplotlib.pyplot as plt
import sklearn.svm
import IPython.display as ipd
import scipy as sp
%matplotlib inline
from sklearn.mixture import BayesianGaussianMixture
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd





In [9]:
def compute_mfcc(audio, fs, n_mfcc):
    # Compute the spectrogram of the audio signal
    X = np.abs(librosa.stft(
        audio,
        window='hamming',
        n_fft=1024,
        hop_length=512,)
        )
    
    # Find the weights of the mel filters
    mel = librosa.filters.mel(
        sr=fs,
        n_fft=1024,
        n_mels=40,
        fmin=133.33,
        fmax=6853.8,
    )
    
    # Apply the filters to spectrogram
    melspectrogram = np.dot(mel, X)
    # Take the logarithm
    log_melspectrogram = np.log10(melspectrogram + 1e-16)
    
    # Apply the DCT to log melspectrogram to obtain the coefficients
    mfcc = sp.fftpack.dct(log_melspectrogram, axis=0, norm='ortho')[1:n_mfcc+1]
    return mfcc

In [12]:
# define the dictionary containing the 10 classes
class_train_files = {'zero': [], 'one': [], 'two': [], 'three': [], 'four': [], 'five': [], 'six': [], 'seven': [], 'eight': [], 'nine': []}

root = '../free-spoken-digit-dataset/recordings'
for f in os.listdir(root):
    for index, c in enumerate(class_train_files):
        if int(f[0]) == index:
            class_train_files[c].append(f)

In [13]:
# listen to an audio file
audio, fs = librosa.load(os.path.join(root, class_train_files['zero'][3]), sr=None)
ipd.Audio(audio, rate=fs) # load the file

In [14]:
df = pd.DataFrame(class_train_files)
# split the data into train and test set (for now 10% test)
train, test = train_test_split(df, test_size=0.1, random_state=42, shuffle=True) #0.1 perchè abbiamo fatto una divisione del 10%
print(train.shape[0])
print(test.shape[0])

x = np.empty(train.shape[0], object)# serve dopo

270
30


In [15]:
df

Unnamed: 0,zero,one,two,three,four,five,six,seven,eight,nine
0,0_theo_26.wav,1_jackson_37.wav,2_jackson_41.wav,3_nicolas_6.wav,4_yweweler_43.wav,5_yweweler_45.wav,6_lucas_14.wav,7_nicolas_24.wav,8_jackson_31.wav,9_george_7.wav
1,0_nicolas_31.wav,1_jackson_33.wav,2_yweweler_18.wav,3_george_44.wav,4_theo_45.wav,5_jackson_32.wav,6_george_4.wav,7_jackson_38.wav,8_theo_0.wav,9_theo_34.wav
2,0_yweweler_6.wav,1_lucas_49.wav,2_nicolas_29.wav,3_theo_7.wav,4_jackson_20.wav,5_jackson_31.wav,6_yweweler_40.wav,7_theo_4.wav,8_lucas_35.wav,9_nicolas_49.wav
3,0_george_37.wav,1_jackson_19.wav,2_jackson_24.wav,3_jackson_44.wav,4_theo_9.wav,5_nicolas_28.wav,6_nicolas_32.wav,7_lucas_12.wav,8_theo_16.wav,9_yweweler_47.wav
4,0_jackson_49.wav,1_nicolas_29.wav,2_lucas_11.wav,3_nicolas_41.wav,4_nicolas_39.wav,5_george_7.wav,6_lucas_33.wav,7_george_37.wav,8_george_35.wav,9_jackson_18.wav
...,...,...,...,...,...,...,...,...,...,...
295,0_jackson_3.wav,1_theo_6.wav,2_theo_23.wav,3_george_24.wav,4_lucas_6.wav,5_theo_31.wav,6_lucas_47.wav,7_nicolas_21.wav,8_yweweler_4.wav,9_george_13.wav
296,0_theo_21.wav,1_george_41.wav,2_george_18.wav,3_jackson_30.wav,4_theo_22.wav,5_jackson_18.wav,6_jackson_14.wav,7_yweweler_15.wav,8_yweweler_32.wav,9_george_41.wav
297,0_george_24.wav,1_george_4.wav,2_lucas_36.wav,3_jackson_16.wav,4_yweweler_44.wav,5_lucas_8.wav,6_george_20.wav,7_yweweler_23.wav,8_george_18.wav,9_nicolas_44.wav
298,0_nicolas_15.wav,1_theo_39.wav,2_theo_31.wav,3_yweweler_49.wav,4_lucas_13.wav,5_jackson_28.wav,6_yweweler_3.wav,7_lucas_35.wav,8_nicolas_36.wav,9_nicolas_27.wav


In [25]:
# initialize dictionaries
dict_train_mfcc = {'zero': [], 'one': [], 'two': [], 'three': [], 'four': [], 'five': [], 'six': [], 'seven': [], 'eight': [], 'nine': []}
dict_test_mfcc = {'zero': [], 'one': [], 'two': [], 'three': [], 'four': [], 'five': [], 'six': [], 'seven': [], 'eight': [], 'nine': []}


# why 13? try different values. check warning
n_mfcc = 13

df_list = [train, test]

# for train and test
for index, data in enumerate(df_list):
    # for each class
    for c in data:
        # set up array for storing mfcc
        if index == 0:
            dict_train_mfcc[c] = np.zeros(((len(data[c])), n_mfcc))
        else:
            dict_test_mfcc[c] = np.zeros(((len(data[c])), n_mfcc))
        #temp_features = np.zeros(((len(data[c])), n_mfcc))
        # compute mfcc
        for index2, f in enumerate(data[c]):
            audio, fs = librosa.load(os.path.join(root, f), sr=None)
            mfcc = librosa.feature.mfcc(y=audio, sr=fs, n_mfcc=n_mfcc, n_fft=1024, n_mels=40)
            #temp_features[index2, :] = np.mean(mfcc, axis=1)
            # store results for current class and file
            if index == 0:
                dict_train_mfcc[c][index2, :] = np.mean(mfcc, axis=1)
            else:
                dict_test_mfcc[c][index2, :] = np.mean(mfcc, axis=1)
print('finished')

finished


array([[-1.37122299e+02,  7.75059967e+01,  1.59639025e+01, ...,
        -3.98335844e-01,  2.91935176e-01,  1.79731810e+00],
       [-1.64826126e+02,  2.86775570e+01,  1.12079573e+01, ...,
        -1.27427940e+01, -3.38828683e+00, -7.15866280e+00],
       [-1.43726212e+02,  5.05516357e+01,  2.87539406e+01, ...,
        -1.54553497e+00,  1.16297913e+00, -1.39292940e-01],
       ...,
       [-1.61628998e+02,  3.95840721e+01,  3.59555168e+01, ...,
        -1.01582158e+00, -2.45307803e+00, -5.82932591e-01],
       [-2.19602661e+02,  5.76333122e+01,  9.97616482e+00, ...,
        -7.58781195e+00,  2.59803271e+00, -6.61766768e-01],
       [-2.32003708e+02,  4.84568176e+01,  1.16291227e+01, ...,
        -5.44475126e+00,  3.70439202e-01, -9.12064791e-01]])

# GMM

In [27]:
n_components = 3
gmm=np.empty(10,object) #define gmmlike an empty array with shape =10 and type: object

In [36]:
#define the with all the coeffifients of all the classes 
dict_train_mfcc_all = ([dict_train_mfcc['zero'], dict_train_mfcc['one'],dict_train_mfcc['two'],dict_train_mfcc['three'],
                                    dict_train_mfcc['four'],dict_train_mfcc['five'], dict_train_mfcc['six'], dict_train_mfcc['seven'],
                                     dict_train_mfcc['eight'], dict_train_mfcc['nine'] ])
dict_test_mfcc_all= np.concatenate((dict_test_mfcc['zero'],dict_test_mfcc['one'],dict_test_mfcc['two'],
                                   dict_test_mfcc['three'],dict_test_mfcc['four'],dict_test_mfcc['five'],
                                   dict_test_mfcc['six'],dict_test_mfcc['seven'],dict_test_mfcc['eight'],dict_test_mfcc['nine']),axis=0)

In [29]:
#define gmm for each class
for i in range(10):
    gmm[i]= BayesianGaussianMixture(n_components=n_components, random_state=2)
    gmm[i].fit(dict_train_mfcc_all[i])

In [30]:
gmm[3].means_ #check

array([[-1.44910736e+02,  3.88439279e+01,  2.83160166e+01,
         1.38913025e+01, -4.84675010e+00, -1.11519734e+01,
        -2.52919338e+00, -8.58233635e+00, -9.24556467e+00,
        -3.04577086e+00, -3.03724606e-01, -1.81399409e-01,
         1.94662462e-01],
       [-2.34141393e+02,  3.27109682e+01,  2.07834491e+01,
         1.83042776e+01, -1.19040635e+01, -2.43336645e-01,
        -9.17205059e+00, -5.99853755e+00, -6.19740996e+00,
        -3.15762593e+00, -1.40679870e+00,  3.62884660e-01,
        -1.61640461e+00],
       [-1.56741054e+02,  6.46092533e+00,  2.21074562e+01,
         1.12050416e+01, -1.90003012e+01, -2.12884099e+01,
        -1.37429518e+01, -1.79434917e+01, -1.70335265e+01,
        -4.38517978e+00, -1.36297798e+01, -4.67634393e+00,
        -8.80783350e+00]])

In [31]:
mixt_pdf_0 = []
mixt_pdf_1 = []
mixt_pdf_2 = []
mixt_pdf_3 = []
mixt_pdf_4 = []
mixt_pdf_5 = []
mixt_pdf_6 = []
mixt_pdf_7 = []
mixt_pdf_8 = []
mixt_pdf_9 = []


sample_0 = []
sample_1 = []
sample_2 = []
sample_3 = []
sample_4 = []
sample_5 = []
sample_6 = []
sample_7 = []
sample_8 = []
sample_9 = []


for n in np.arange(n_components):
    mixt_gauss_0 = sp.stats.multivariate_normal(gmm[0].means_[n, :], gmm[0].covariances_[n, :], allow_singular=True)
    mixt_gauss_1 = sp.stats.multivariate_normal(gmm[1].means_[n, :], gmm[1].covariances_[n, :], allow_singular=True)
    mixt_gauss_2 = sp.stats.multivariate_normal(gmm[2].means_[n, :], gmm[2].covariances_[n, :], allow_singular=True)
    mixt_gauss_3 = sp.stats.multivariate_normal(gmm[3].means_[n, :], gmm[3].covariances_[n, :], allow_singular=True)
    mixt_gauss_4 = sp.stats.multivariate_normal(gmm[4].means_[n, :], gmm[4].covariances_[n, :], allow_singular=True)
    mixt_gauss_5 = sp.stats.multivariate_normal(gmm[5].means_[n, :], gmm[5].covariances_[n, :], allow_singular=True)
    mixt_gauss_6 = sp.stats.multivariate_normal(gmm[6].means_[n, :], gmm[6].covariances_[n, :], allow_singular=True)
    mixt_gauss_7 = sp.stats.multivariate_normal(gmm[7].means_[n, :], gmm[7].covariances_[n, :], allow_singular=True)
    mixt_gauss_8 = sp.stats.multivariate_normal(gmm[8].means_[n, :], gmm[8].covariances_[n, :], allow_singular=True)
    mixt_gauss_9 = sp.stats.multivariate_normal(gmm[9].means_[n, :], gmm[9].covariances_[n, :], allow_singular=True)
    
    
    # fix pdf
    mixt_pdf_0.append(gmm[0].weights_[n] * mixt_gauss_0.pdf(dict_test_mfcc_all))
    mixt_pdf_1.append(gmm[1].weights_[n] * mixt_gauss_1.pdf(dict_test_mfcc_all))
    mixt_pdf_2.append(gmm[2].weights_[n] * mixt_gauss_2.pdf(dict_test_mfcc_all))
    mixt_pdf_3.append(gmm[3].weights_[n] * mixt_gauss_3.pdf(dict_test_mfcc_all))
    mixt_pdf_4.append(gmm[4].weights_[n] * mixt_gauss_4.pdf(dict_test_mfcc_all))
    mixt_pdf_5.append(gmm[5].weights_[n] * mixt_gauss_5.pdf(dict_test_mfcc_all))
    mixt_pdf_6.append(gmm[6].weights_[n] * mixt_gauss_6.pdf(dict_test_mfcc_all))
    mixt_pdf_7.append(gmm[7].weights_[n] * mixt_gauss_7.pdf(dict_test_mfcc_all))
    mixt_pdf_8.append(gmm[8].weights_[n] * mixt_gauss_8.pdf(dict_test_mfcc_all))
    mixt_pdf_9.append(gmm[9].weights_[n] * mixt_gauss_9.pdf(dict_test_mfcc_all))
    
    
    sample_0.append(mixt_gauss_0.rvs(np.int(500 * gmm[0].weights_[n])))
    sample_1.append(mixt_gauss_1.rvs(np.int(500 * gmm[1].weights_[n])))
    sample_2.append(mixt_gauss_2.rvs(np.int(500 * gmm[2].weights_[n])))
    sample_3.append(mixt_gauss_3.rvs(np.int(500 * gmm[3].weights_[n])))
    sample_4.append(mixt_gauss_4.rvs(np.int(500 * gmm[4].weights_[n])))
    sample_5.append(mixt_gauss_5.rvs(np.int(500 * gmm[5].weights_[n])))
    sample_6.append(mixt_gauss_6.rvs(np.int(500 * gmm[6].weights_[n])))
    sample_7.append(mixt_gauss_7.rvs(np.int(500 * gmm[7].weights_[n])))
    sample_8.append(mixt_gauss_8.rvs(np.int(500 * gmm[8].weights_[n])))
    sample_9.append(mixt_gauss_9.rvs(np.int(500 * gmm[9].weights_[n])))
print("finished")

finished


In [40]:
pdf_0 = np.sum(mixt_pdf_0, axis=0).reshape(-1, 1)
pdf_1 = np.sum(mixt_pdf_1, axis=0).reshape(-1, 1)
pdf_2 = np.sum(mixt_pdf_2, axis=0).reshape(-1, 1)
pdf_3 = np.sum(mixt_pdf_3, axis=0).reshape(-1, 1)
pdf_4 = np.sum(mixt_pdf_4, axis=0).reshape(-1, 1)
pdf_5 = np.sum(mixt_pdf_5, axis=0).reshape(-1, 1)
pdf_6 = np.sum(mixt_pdf_6, axis=0).reshape(-1, 1)
pdf_7 = np.sum(mixt_pdf_7, axis=0).reshape(-1, 1)
pdf_8 = np.sum(mixt_pdf_8, axis=0).reshape(-1, 1)
pdf_9 = np.sum(mixt_pdf_9, axis=0).reshape(-1, 1)
#concatenate all the pdf 
pdf = np.concatenate((pdf_0, pdf_1, pdf_2, pdf_3, pdf_4, pdf_5,pdf_6, pdf_7, pdf_8,pdf_9), axis=1)
predicted_test_labels = np.argmax(pdf, axis=1)

Y_train = np.concatenate((np.zeros(dict_train_mfcc['zero'].shape[0]), np.ones(dict_train_mfcc['one'].shape[0]), np.ones(dict_train_mfcc['two'].shape[0])*2, np.ones(dict_train_mfcc['three'].shape[0])*3, np.ones(dict_train_mfcc['four'].shape[0])*4, np.ones(dict_train_mfcc['five'].shape[0])*5, np.ones(dict_train_mfcc['six'].shape[0])*6, np.ones(dict_train_mfcc['seven'].shape[0])*7, np.ones(dict_train_mfcc['eight'].shape[0])*8, np.ones(dict_train_mfcc['nine'].shape[0])*9), axis=0)
Y_test = np.concatenate((np.zeros(dict_test_mfcc['zero'].shape[0]), np.ones(dict_test_mfcc['one'].shape[0]), np.ones(dict_test_mfcc['two'].shape[0])*2, np.ones(dict_test_mfcc['three'].shape[0])*3, np.ones(dict_test_mfcc['four'].shape[0])*4, np.ones(dict_test_mfcc['five'].shape[0])*5, np.ones(dict_test_mfcc['six'].shape[0])*6, np.ones(dict_test_mfcc['seven'].shape[0])*7, np.ones(dict_test_mfcc['eight'].shape[0])*8, np.ones(dict_test_mfcc['nine'].shape[0])*9), axis=0)

## plot stats

In [41]:
from sklearn import metrics
accuracy = metrics.accuracy_score(Y_test, predicted_test_labels, normalize=True)
precision = metrics.precision_score(Y_test, predicted_test_labels, average='weighted')
recall = metrics.recall_score(Y_test, predicted_test_labels, average='weighted')
F1_score = metrics.f1_score(Y_test, predicted_test_labels, average='weighted')
print("Results: \n accuracy = {} \n precision = {} \n recall = {} \n F1 score = {}".format(
        accuracy, precision, recall, F1_score))

Results: 
 accuracy = 0.9333333333333333 
 precision = 0.9359178187403994 
 recall = 0.9333333333333333 
 F1 score = 0.9334502264756281


# PLOT SAMPLES

In [33]:
#plot the 3D figure
from mpl_toolkits.mplot3d import Axes3D
%matplotlib
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(111, projection='3d')

markers = ['x', 'o', '*']

for n in np.arange(n_components):
    x = sample_0[n][:, 0]
    y = sample_0[n][:, 1]
    z = sample_0[n][:, 2]
    ax.scatter(x, y, z, c='red', marker=markers[n])
    
    x = sample_1[n][:, 0]
    y = sample_1[n][:, 1]
    z = sample_1[n][:, 2]
    ax.scatter(x, y, z, c='blue', marker=markers[n])
    
    x = sample_2[n][:, 0]
    y = sample_2[n][:, 1]
    z = sample_2[n][:, 2]
    ax.scatter(x, y, z, c='yellow', marker=markers[n])
    
    x = sample_3[n][:, 0]
    y = sample_3[n][:, 1]
    z = sample_3[n][:, 2]
    ax.scatter(x, y, z, c='green', marker=markers[n])
    
    x = sample_4[n][:, 0]
    y = sample_4[n][:, 1]
    z = sample_4[n][:, 2]
    ax.scatter(x, y, z, c='purple', marker=markers[n])
    
    x = sample_5[n][:, 0]
    y = sample_5[n][:, 1]
    z = sample_5[n][:, 2]
    ax.scatter(x, y, z, c='orange', marker=markers[n])
    
    x = sample_6[n][:, 0]
    y = sample_6[n][:, 1]
    z = sample_6[n][:, 2]
    ax.scatter(x, y, z, c='black', marker=markers[n])
    
    x = sample_7[n][:, 0]
    y = sample_7[n][:, 1]
    z = sample_7[n][:, 2]
    ax.scatter(x, y, z, c='grey', marker=markers[n])
    
    x = sample_8[n][:, 0]
    y = sample_8[n][:, 1]
    z = sample_8[n][:, 2]
    ax.scatter(x, y, z, c='brown', marker=markers[n])
    
    x = sample_9[n][:, 0]
    y = sample_9[n][:, 1]
    z = sample_9[n][:, 2]
    ax.scatter(x, y, z, c='pink', marker=markers[n])
    
plt.show()

Using matplotlib backend: Qt5Agg


## plot metrics