In [2]:
import numpy as np
import librosa
import os
import matplotlib.pyplot as plt
import sklearn.svm
import IPython.display as ipd
import scipy as sp
%matplotlib inline
from sklearn.mixture import BayesianGaussianMixture
from sklearn.model_selection import train_test_split
import seaborn as sns
import pandas as pd





In [3]:
def compute_mfcc(audio, fs, n_mfcc):
    # Compute the spectrogram of the audio signal
    X = np.abs(librosa.stft(
        audio,
        window='hamming',
        n_fft=1024,
        hop_length=512,)
        )
    
    # Find the weights of the mel filters
    mel = librosa.filters.mel(
        sr=fs,
        n_fft=1024,
        n_mels=40,
        fmin=133.33,
        fmax=6853.8,
    )
    
    # Apply the filters to spectrogram
    melspectrogram = np.dot(mel, X)
    # Take the logarithm
    log_melspectrogram = np.log10(melspectrogram + 1e-16)
    
    # Apply the DCT to log melspectrogram to obtain the coefficients
    mfcc = sp.fftpack.dct(log_melspectrogram, axis=0, norm='ortho')[1:n_mfcc+1]
    return mfcc

In [4]:
# define the dictionary containing the 10 classes
class_train_files = {'zero': [], 'one': [], 'two': [], 'three': [], 'four': [], 'five': [], 'six': [], 'seven': [], 'eight': [], 'nine': []}

root = '../HW1/free-spoken-digit-dataset-master/recordings'
for f in os.listdir(root):
    for index, c in enumerate(class_train_files):
        if int(f[0]) == index:
            class_train_files[c].append(f)

In [5]:
# listen to an audio file
audio, fs = librosa.load(os.path.join(root, class_train_files['zero'][3]), sr=None)
ipd.Audio(audio, rate=fs) # load the file

In [6]:
df = pd.DataFrame(class_train_files)
# split the data into train and test set (for now 10% test)
train, test = train_test_split(df, test_size=0.1, random_state=42, shuffle=True) #0.1 perchè abbiamo fatto una divisione del 10%
print(train.shape[0])
print(test.shape[0])

x = np.empty(train.shape[0], object)# serve dopo

270
30


In [7]:
df

Unnamed: 0,zero,one,two,three,four,five,six,seven,eight,nine
0,0_george_0.wav,1_george_0.wav,2_george_0.wav,3_george_0.wav,4_george_0.wav,5_george_0.wav,6_george_0.wav,7_george_0.wav,8_george_0.wav,9_george_0.wav
1,0_george_1.wav,1_george_1.wav,2_george_1.wav,3_george_1.wav,4_george_1.wav,5_george_1.wav,6_george_1.wav,7_george_1.wav,8_george_1.wav,9_george_1.wav
2,0_george_10.wav,1_george_10.wav,2_george_10.wav,3_george_10.wav,4_george_10.wav,5_george_10.wav,6_george_10.wav,7_george_10.wav,8_george_10.wav,9_george_10.wav
3,0_george_11.wav,1_george_11.wav,2_george_11.wav,3_george_11.wav,4_george_11.wav,5_george_11.wav,6_george_11.wav,7_george_11.wav,8_george_11.wav,9_george_11.wav
4,0_george_12.wav,1_george_12.wav,2_george_12.wav,3_george_12.wav,4_george_12.wav,5_george_12.wav,6_george_12.wav,7_george_12.wav,8_george_12.wav,9_george_12.wav
...,...,...,...,...,...,...,...,...,...,...
295,0_yweweler_5.wav,1_yweweler_5.wav,2_yweweler_5.wav,3_yweweler_5.wav,4_yweweler_5.wav,5_yweweler_5.wav,6_yweweler_5.wav,7_yweweler_5.wav,8_yweweler_5.wav,9_yweweler_5.wav
296,0_yweweler_6.wav,1_yweweler_6.wav,2_yweweler_6.wav,3_yweweler_6.wav,4_yweweler_6.wav,5_yweweler_6.wav,6_yweweler_6.wav,7_yweweler_6.wav,8_yweweler_6.wav,9_yweweler_6.wav
297,0_yweweler_7.wav,1_yweweler_7.wav,2_yweweler_7.wav,3_yweweler_7.wav,4_yweweler_7.wav,5_yweweler_7.wav,6_yweweler_7.wav,7_yweweler_7.wav,8_yweweler_7.wav,9_yweweler_7.wav
298,0_yweweler_8.wav,1_yweweler_8.wav,2_yweweler_8.wav,3_yweweler_8.wav,4_yweweler_8.wav,5_yweweler_8.wav,6_yweweler_8.wav,7_yweweler_8.wav,8_yweweler_8.wav,9_yweweler_8.wav


In [8]:
# initialize dictionaries
dict_train_mfcc = {'zero': [], 'one': [], 'two': [], 'three': [], 'four': [], 'five': [], 'six': [], 'seven': [], 'eight': [], 'nine': []}
dict_test_mfcc = {'zero': [], 'one': [], 'two': [], 'three': [], 'four': [], 'five': [], 'six': [], 'seven': [], 'eight': [], 'nine': []}


# why 13? try different values. check warning
n_mfcc = 13

df_list = [train, test]

# for train and test
for index, data in enumerate(df_list):
    # for each class
    for c in data:
        # set up array for storing mfcc
        if index == 0:
            dict_train_mfcc[c] = np.zeros(((len(data[c])), n_mfcc))
        else:
            dict_test_mfcc[c] = np.zeros(((len(data[c])), n_mfcc))
        #temp_features = np.zeros(((len(data[c])), n_mfcc))
        # compute mfcc
        for index2, f in enumerate(data[c]):
            audio, fs = librosa.load(os.path.join(root, f), sr=None)
            mfcc = compute_mfcc(audio, fs, n_mfcc)
            #temp_features[index2, :] = np.mean(mfcc, axis=1)
            # store results for current class and file
            if index == 0:
                dict_train_mfcc[c][index2, :] = np.mean(mfcc, axis=1)
            else:
                dict_test_mfcc[c][index2, :] = np.mean(mfcc, axis=1)
print('finished')

  "Empty filters detected in mel frequency basis. "


finished


In [9]:
dict_train_mfcc['zero']

array([[ 1.98860245e+01, -1.59650784e+01,  1.43149490e+01, ...,
        -2.59056950e+00,  1.00118470e+00, -3.77721265e-02],
       [ 2.00689468e+01, -1.52239933e+01,  1.39640274e+01, ...,
        -3.23854208e+00,  1.55321765e+00, -1.45731062e-01],
       [ 2.01498814e+01, -1.65284481e+01,  1.45636511e+01, ...,
        -2.60201788e+00,  7.87259996e-01, -9.46022570e-03],
       ...,
       [ 2.02024574e+01, -1.56604271e+01,  1.29856453e+01, ...,
        -2.61145973e+00,  1.35453379e+00, -1.63168117e-01],
       [ 2.02127972e+01, -1.48988028e+01,  1.27807207e+01, ...,
        -2.64844584e+00,  1.22878456e+00, -4.22044158e-01],
       [ 2.07306061e+01, -1.59874058e+01,  1.33329201e+01, ...,
        -2.65481067e+00,  1.47558928e+00, -1.86434969e-01]])

# GMM

In [10]:
n_components = 3
gmm=np.empty(10,object) #define gmmlike an empty array with shape =10 and type: object

In [11]:
#define the with all the coeffifients of all the classes 
dict_train_mfcc_all = ([dict_train_mfcc['zero'], dict_train_mfcc['one'],dict_train_mfcc['two'],dict_train_mfcc['three'],
                                    dict_train_mfcc['four'],dict_train_mfcc['five'], dict_train_mfcc['six'], dict_train_mfcc['seven'],
                                     dict_train_mfcc['eight'], dict_train_mfcc['nine'] ])
dict_test_mfcc_all= np.concatenate((dict_test_mfcc['zero'],dict_test_mfcc['one'],dict_test_mfcc['two'],
                                   dict_test_mfcc['three'],dict_test_mfcc['four'],dict_test_mfcc['five'],
                                   dict_test_mfcc['six'],dict_test_mfcc['seven'],dict_test_mfcc['eight'],dict_test_mfcc['nine']),axis=0)

In [12]:
#define gmm for each class
for i in range(10):
    gmm[i]= BayesianGaussianMixture(n_components=n_components, random_state=2)
    gmm[i].fit(dict_train_mfcc_all[i])

In [None]:
gmm[3].means_ #check

In [None]:
mixt_pdf_0 = []
mixt_pdf_1 = []
mixt_pdf_2 = []
mixt_pdf_3 = []
mixt_pdf_4 = []
mixt_pdf_5 = []
mixt_pdf_6 = []
mixt_pdf_7 = []
mixt_pdf_8 = []
mixt_pdf_9 = []


sample_0 = []
sample_1 = []
sample_2 = []
sample_3 = []
sample_4 = []
sample_5 = []
sample_6 = []
sample_7 = []
sample_8 = []
sample_9 = []


for n in np.arange(n_components):
    mixt_gauss_0 = sp.stats.multivariate_normal(gmm[0].means_[n, :], gmm[0].covariances_[n, :], allow_singular=True)
    mixt_gauss_1 = sp.stats.multivariate_normal(gmm[1].means_[n, :], gmm[1].covariances_[n, :], allow_singular=True)
    mixt_gauss_2 = sp.stats.multivariate_normal(gmm[2].means_[n, :], gmm[2].covariances_[n, :], allow_singular=True)
    mixt_gauss_3 = sp.stats.multivariate_normal(gmm[3].means_[n, :], gmm[3].covariances_[n, :], allow_singular=True)
    mixt_gauss_4 = sp.stats.multivariate_normal(gmm[4].means_[n, :], gmm[4].covariances_[n, :], allow_singular=True)
    mixt_gauss_5 = sp.stats.multivariate_normal(gmm[5].means_[n, :], gmm[5].covariances_[n, :], allow_singular=True)
    mixt_gauss_6 = sp.stats.multivariate_normal(gmm[6].means_[n, :], gmm[6].covariances_[n, :], allow_singular=True)
    mixt_gauss_7 = sp.stats.multivariate_normal(gmm[7].means_[n, :], gmm[7].covariances_[n, :], allow_singular=True)
    mixt_gauss_8 = sp.stats.multivariate_normal(gmm[8].means_[n, :], gmm[8].covariances_[n, :], allow_singular=True)
    mixt_gauss_9 = sp.stats.multivariate_normal(gmm[9].means_[n, :], gmm[9].covariances_[n, :], allow_singular=True)
    
    
    # fix pdf
    mixt_pdf_0.append(gmm[0].weights_[n] * mixt_gauss_0.pdf(dict_test_mfcc_all))
    mixt_pdf_1.append(gmm[1].weights_[n] * mixt_gauss_1.pdf(dict_test_mfcc_all))
    mixt_pdf_2.append(gmm[2].weights_[n] * mixt_gauss_2.pdf(dict_test_mfcc_all))
    mixt_pdf_3.append(gmm[3].weights_[n] * mixt_gauss_3.pdf(dict_test_mfcc_all))
    mixt_pdf_4.append(gmm[4].weights_[n] * mixt_gauss_4.pdf(dict_test_mfcc_all))
    mixt_pdf_5.append(gmm[5].weights_[n] * mixt_gauss_5.pdf(dict_test_mfcc_all))
    mixt_pdf_6.append(gmm[6].weights_[n] * mixt_gauss_6.pdf(dict_test_mfcc_all))
    mixt_pdf_7.append(gmm[7].weights_[n] * mixt_gauss_7.pdf(dict_test_mfcc_all))
    mixt_pdf_8.append(gmm[8].weights_[n] * mixt_gauss_8.pdf(dict_test_mfcc_all))
    mixt_pdf_9.append(gmm[9].weights_[n] * mixt_gauss_9.pdf(dict_test_mfcc_all))
    
    
    sample_0.append(mixt_gauss_0.rvs(np.int(500 * gmm[0].weights_[n])))
    sample_1.append(mixt_gauss_1.rvs(np.int(500 * gmm[1].weights_[n])))
    sample_2.append(mixt_gauss_2.rvs(np.int(500 * gmm[2].weights_[n])))
    sample_3.append(mixt_gauss_3.rvs(np.int(500 * gmm[3].weights_[n])))
    sample_4.append(mixt_gauss_4.rvs(np.int(500 * gmm[4].weights_[n])))
    sample_5.append(mixt_gauss_5.rvs(np.int(500 * gmm[5].weights_[n])))
    sample_6.append(mixt_gauss_6.rvs(np.int(500 * gmm[6].weights_[n])))
    sample_7.append(mixt_gauss_7.rvs(np.int(500 * gmm[7].weights_[n])))
    sample_8.append(mixt_gauss_8.rvs(np.int(500 * gmm[8].weights_[n])))
    sample_9.append(mixt_gauss_9.rvs(np.int(500 * gmm[9].weights_[n])))
print("finished")

In [None]:
pdf_0 = np.sum(mixt_pdf_0, axis=0).reshape(-1, 1)
pdf_1 = np.sum(mixt_pdf_1, axis=0).reshape(-1, 1)
pdf_2 = np.sum(mixt_pdf_2, axis=0).reshape(-1, 1)
pdf_3 = np.sum(mixt_pdf_3, axis=0).reshape(-1, 1)
pdf_4 = np.sum(mixt_pdf_4, axis=0).reshape(-1, 1)
pdf_5 = np.sum(mixt_pdf_5, axis=0).reshape(-1, 1)
pdf_6 = np.sum(mixt_pdf_6, axis=0).reshape(-1, 1)
pdf_7 = np.sum(mixt_pdf_7, axis=0).reshape(-1, 1)
pdf_8 = np.sum(mixt_pdf_8, axis=0).reshape(-1, 1)
pdf_9 = np.sum(mixt_pdf_9, axis=0).reshape(-1, 1)
#concatenate all the pdf 
pdf = np.concatenate((pdf_0, pdf_1, pdf_2, pdf_3, pdf_4, pdf_5,pdf_6, pdf_7, pdf_8,pdf_9), axis=1)
predicted_test_labels = np.argmax(pdf, axis=1)

# PLOT SAMPLES

In [None]:
#plot the 3D figure
from mpl_toolkits.mplot3d import Axes3D
%matplotlib
fig = plt.figure(figsize=(16, 8))
ax = fig.add_subplot(111, projection='3d')

markers = ['x', 'o', '*']

for n in np.arange(n_components):
    x = sample_0[n][:, 0]
    y = sample_0[n][:, 1]
    z = sample_0[n][:, 2]
    ax.scatter(x, y, z, c='red', marker=markers[n])
    
    x = sample_1[n][:, 0]
    y = sample_1[n][:, 1]
    z = sample_1[n][:, 2]
    ax.scatter(x, y, z, c='blue', marker=markers[n])
    
    x = sample_2[n][:, 0]
    y = sample_2[n][:, 1]
    z = sample_2[n][:, 2]
    ax.scatter(x, y, z, c='yellow', marker=markers[n])
    
    x = sample_3[n][:, 0]
    y = sample_3[n][:, 1]
    z = sample_3[n][:, 2]
    ax.scatter(x, y, z, c='green', marker=markers[n])
    
    x = sample_4[n][:, 0]
    y = sample_4[n][:, 1]
    z = sample_4[n][:, 2]
    ax.scatter(x, y, z, c='purple', marker=markers[n])
    
    x = sample_5[n][:, 0]
    y = sample_5[n][:, 1]
    z = sample_5[n][:, 2]
    ax.scatter(x, y, z, c='orange', marker=markers[n])
    
    x = sample_6[n][:, 0]
    y = sample_6[n][:, 1]
    z = sample_6[n][:, 2]
    ax.scatter(x, y, z, c='black', marker=markers[n])
    
    x = sample_7[n][:, 0]
    y = sample_7[n][:, 1]
    z = sample_7[n][:, 2]
    ax.scatter(x, y, z, c='grey', marker=markers[n])
    
    x = sample_8[n][:, 0]
    y = sample_8[n][:, 1]
    z = sample_8[n][:, 2]
    ax.scatter(x, y, z, c='brown', marker=markers[n])
    
    x = sample_9[n][:, 0]
    y = sample_9[n][:, 1]
    z = sample_9[n][:, 2]
    ax.scatter(x, y, z, c='pink', marker=markers[n])
    
plt.show()