In [12]:
from google.colab import drive
drive.mount('/content/gdrive')

Drive already mounted at /content/gdrive; to attempt to forcibly remount, call drive.mount("/content/gdrive", force_remount=True).


In [13]:
cd /content/gdrive/My Drive/

/content/gdrive/My Drive


In [14]:
!pip install hmmlearn



In [15]:
import os
import time
import pickle
import librosa
import numpy as np
from hmmlearn import hmm
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix, accuracy_score

In [16]:
words = ['down','go','left','no','off','on','right','stop','up','yes']

In [17]:
def apply_pre_emphasis(data):
    alpha = 0.95
    zero = np.zeros(1)
    orig_sig = np.append(data, zero)
    delay_sig = np.append(zero, data)
    output = (orig_sig - alpha*delay_sig)[:-1]
    return output

In [18]:
def endpointing(sig, fs, win_dur, win_overlap):
    win_len = int(win_dur*fs*0.001)
    overlap = int(win_overlap*fs*0.001)
    rmse = librosa.feature.rmse(sig, frame_length=win_len, 
                                hop_length=overlap, center=True)
    threshold = 0.01
    flag = 0
    
    frame_idx = 0
    while rmse[0][frame_idx] < threshold:
        frame_idx += 1
        if frame_idx == len(rmse[0]):
            flag = 1
            break
    start_idx = librosa.frames_to_samples(frame_idx, hop_length=overlap)
    
    # rmse = librosa.feature.rmse(sig[::-1], frame_length=win_len,
    #                             hop_length=overlap, center=True)
    # frame_idx = 0
    # while rmse[0][frame_idx] < threshold:
    #     frame_idx += 1
    #     if frame_idx == len(rmse[0]):
    #         flag = 1
    #         break
    # end_idx = librosa.frames_to_samples(frame_idx, hop_length=overlap)
    end_idx = 0
    
    if flag or (start_idx>=(len(sig)-end_idx-1)):
        return sig, False
    else:
        return sig[start_idx:-end_idx-1], True

In [19]:
def load_data(path):
    all_words = []
    for i in range(len(words)):
        data_path = path + "/" + words[i]
        word = []
        for file_name in os.listdir(data_path):
            x, fs = librosa.load(data_path + "/" + file_name)
            word.append(x)
        all_words.append(word)
    return all_words, fs

In [20]:
def data_preprocessing(data, fs, win_dur, win_overlap):
    X = []
    for i in range(len(data)):
        x = []
        for j in range(len(data[i])):
            out, flag = endpointing(data[i][j], fs, win_dur, win_overlap)
            if flag:
                out = apply_pre_emphasis(out)
                x.append(out)
        X.append(x)
    return X

In [21]:
def feature_extraction(data, fs, win_dur, win_overlap):
    all_features = []
    win_len = int(win_dur*fs*0.001)
    overlap = int(win_overlap*fs*0.001)
    for i in range(len(data)):
        features = []
        for j in range(len(data[i])):
            mfcc = librosa.feature.mfcc(data[i][j], sr=fs, n_mfcc=15, 
                                        n_fft=win_len, hop_length=overlap, 
                                        n_mels=64).T
            feature = []
            for k in range(len(mfcc)):
                temp1 = np.append(mfcc[k], 0)
                temp2 = np.append(0, mfcc[k])
                delta = (temp1 - temp2)[1:-1]
                
                temp1 = np.append(delta, 0)
                temp2 = np.append(0, delta)
                deltaDelta = (temp1 - temp2)[1:-1]
                
                temp = np.append(mfcc[k][:13], delta[:13])
                temp = np.append(temp, deltaDelta)
                feature.append(temp)
            features.append(np.array(feature))
        all_features.append(features)
    return all_features

In [22]:
win_dur = 20
win_overlap = 10

In [23]:
print("Loading Data...")
with open('EE679/train_clean.pkl', 'rb') as f:
    database = pickle.load(f)
data, fs = database['data'], database['fs']
del database
print("Processing Data...")
processed_clean_train_data = data_preprocessing(data, fs, win_dur, win_overlap)
print("Extracting Features...")
clean_train_features = feature_extraction(processed_clean_train_data, fs, win_dur, win_overlap)

Loading Data...
Processing Data...
Extracting Features...


In [24]:
print("Loading Data...")
with open('EE679/train_noisy.pkl', 'rb') as f:
    database = pickle.load(f)
data, fs = database['data'], database['fs']
del database
print("Processing Data...")
processed_noisy_train_data = data_preprocessing(data, fs, win_dur, win_overlap)
print("Extracting Features...")
noisy_train_features = feature_extraction(processed_noisy_train_data, fs, win_dur, win_overlap)

Loading Data...
Processing Data...
Extracting Features...


In [25]:
print("Loading Data...")
with open('EE679/test_noisy.pkl', 'rb') as f:
    database = pickle.load(f)
data, fs = database['data'], database['fs']
del database
print("Processing Data...")
processed_noisy_test_data = data_preprocessing(data, fs, win_dur, win_overlap)
print("Extracting Features...")
noisy_test_features = feature_extraction(processed_noisy_test_data, fs, win_dur, win_overlap)

Loading Data...
Processing Data...
Extracting Features...


In [26]:
print("Loading Data...")
with open('EE679/test_clean.pkl', 'rb') as f:
    database = pickle.load(f)
data, fs = database['data'], database['fs']
del database
print("Processing Data...")
processed_clean_test_data = data_preprocessing(data, fs, win_dur, win_overlap)
print("Extracting Features...")
clean_test_features = feature_extraction(processed_clean_test_data, fs, win_dur, win_overlap)

Loading Data...
Processing Data...
Extracting Features...


In [27]:
X_clean = {}
l_clean = {}
for i in range(len(words)):
    temp = clean_train_features[i]
    lengths = np.array([len(x) for x in temp])
    feature = np.concatenate(temp, axis=0)
    X_clean[words[i]] = feature
    l_clean[words[i]] = lengths

In [28]:
start2 = np.append([1], np.zeros(1))
start3 = np.append([1], np.zeros(2))
start4 = np.append([1], np.zeros(3))

modelDown = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelGo = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelLeft = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelNo = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelOff = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelOn = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelRight = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelStop = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelUp = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelYes = hmm.GaussianHMM(n_components=3, startprob_prior=start3, covariance_type="full", n_iter=40, verbose=True)

In [29]:
modelDown.fit(X_clean['down'], lengths=l_clean['down'])

         1    -2049836.4906             +nan
         2    16575904.1169   +18625740.6074
         3    16727054.1182     +151150.0013
         4    16760393.1530      +33339.0348
         5    16772462.3371      +12069.1842
         6    16779669.2467       +7206.9096
         7    16785331.8835       +5662.6368
         8    16789697.5590       +4365.6755
         9    16793315.3468       +3617.7878
        10    16796254.7888       +2939.4420
        11    16798770.4341       +2515.6452
        12    16800348.7243       +1578.2903
        13    16801916.2451       +1567.5207
        14    16802659.8388        +743.5937
        15    16803533.1735        +873.3347
        16    16804238.7936        +705.6201
        17    16804808.5707        +569.7771
        18    16805152.2146        +343.6439
        19    16805575.4784        +423.2638
        20    16806153.8427        +578.3643
        21    16806112.8718         -40.9710


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [30]:
modelGo.fit(X_clean['go'], lengths=l_clean['go'])

         1    -1924591.1348             +nan
         2    15673466.1249   +17598057.2597
         3    15838743.7272     +165277.6024
         4    15865761.3607      +27017.6334
         5    15871946.8995       +6185.5388
         6    15874900.3904       +2953.4908
         7    15875818.3500        +917.9597
         8    15876690.1413        +871.7913
         9    15877435.1257        +744.9844
        10    15876836.4538        -598.6720


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [31]:
modelLeft.fit(X_clean['left'], lengths=l_clean['left'])

         1    -2033051.0874             +nan
         2    16875639.8922   +18908690.9796
         3    17028320.9546     +152681.0624
         4    17074313.3498      +45992.3952
         5    17092307.8077      +17994.4579
         6    17101144.1618       +8836.3541
         7    17105169.3029       +4025.1411
         8    17107624.5408       +2455.2379
         9    17108896.0984       +1271.5576
        10    17109714.0072        +817.9088
        11    17110364.9425        +650.9353
        12    17111141.7933        +776.8508
        13    17111433.2515        +291.4582
        14    17111838.6943        +405.4428
        15    17112167.6176        +328.9233
        16    17112699.0364        +531.4189
        17    17112582.8358        -116.2007


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [32]:
modelNo.fit(X_clean['no'], lengths=l_clean['no'])

         1    -2031504.1179             +nan
         2    16311716.0000   +18343220.1179
         3    16472389.6412     +160673.6412
         4    16507262.3199      +34872.6787
         5    16515154.2091       +7891.8892
         6    16518970.9070       +3816.6979
         7    16520597.7281       +1626.8211
         8    16520887.1779        +289.4498
         9    16521962.0055       +1074.8276
        10    16522069.4593        +107.4537
        11    16522480.0409        +410.5816
        12    16522323.2759        -156.7650


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [33]:
modelOff.fit(X_clean['off'], lengths=l_clean['off'])

         1    -1870885.7548             +nan
         2    16362036.3826   +18232922.1374
         3    16514691.8335     +152655.4509
         4    16564097.4863      +49405.6528
         5    16574631.5457      +10534.0594
         6    16577421.5341       +2789.9883
         7    16578541.8037       +1120.2696
         8    16578448.6182         -93.1855


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [34]:
modelOn.fit(X_clean['on'], lengths=l_clean['on'])

         1    -2022415.9761             +nan
         2    16002986.5699   +18025402.5460
         3    16157497.8855     +154511.3155
         4    16186781.4608      +29283.5754
         5    16192916.5192       +6135.0584
         6    16196226.0525       +3309.5333
         7    16197655.4754       +1429.4228
         8    16198717.1711       +1061.6957
         9    16198597.2711        -119.9000


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [35]:
modelRight.fit(X_clean['right'], lengths=l_clean['right'])

         1    -2054383.5264             +nan
         2    16455794.8641   +18510178.3905
         3    16605242.9101     +149448.0460
         4    16649001.1198      +43758.2097
         5    16670314.1956      +21313.0758
         6    16683184.6919      +12870.4963
         7    16691042.3534       +7857.6615
         8    16696160.7881       +5118.4347
         9    16699596.1083       +3435.3202
        10    16702063.7094       +2467.6011
        11    16704194.9405       +2131.2311
        12    16706309.4067       +2114.4662
        13    16708953.3651       +2643.9584
        14    16712149.4041       +3196.0390
        15    16714864.3177       +2714.9137
        16    16716094.0253       +1229.7075
        17    16716885.2616        +791.2363
        18    16717167.3635        +282.1019
        19    16717610.5085        +443.1450
        20    16717290.7153        -319.7932


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [36]:
modelStop.fit(X_clean['stop'], lengths=l_clean['stop'])

         1    -1963553.5818             +nan
         2    16893415.7553   +18856969.3370
         3    17030119.2394     +136703.4842
         4    17065165.2744      +35046.0349
         5    17085964.7833      +20799.5089
         6    17098317.8529      +12353.0697
         7    17103426.7478       +5108.8948
         8    17105745.8124       +2319.0647
         9    17106511.6574        +765.8450
        10    17107143.5039        +631.8465
        11    17107171.2078         +27.7039
        12    17107241.9680         +70.7601
        13    17107293.0706         +51.1026
        14    17106977.0602        -316.0104


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [37]:
modelUp.fit(X_clean['up'], lengths=l_clean['up'])

         1    -1640864.2516             +nan
         2    15475414.2652   +17116278.5168
         3    15623689.0185     +148274.7533
         4    15651956.0127      +28266.9941
         5    15659154.3375       +7198.3248
         6    15662179.2075       +3024.8701
         7    15663154.2850        +975.0774
         8    15663390.3023        +236.0174
         9    15664249.5468        +859.2445
        10    15664210.8531         -38.6937


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [38]:
modelYes.fit(X_clean['yes'], lengths=l_clean['yes'])

         1    -2138969.1664             +nan
         2    16881488.5866   +19020457.7530
         3    17043164.1857     +161675.5990
         4    17064531.2546      +21367.0690
         5    17070621.2211       +6089.9665
         6    17073284.9140       +2663.6928
         7    17074937.0502       +1652.1362
         8    17075345.3978        +408.3476
         9    17076337.1142        +991.7164
        10    17076149.8287        -187.2854


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=3, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [39]:
def test_clean_data(testing_features):
    preds = []
    labels = []
    for i in range(len(testing_features)):
        for j in range(len(testing_features[i])):
            x_test = testing_features[i][j]
            res1 = modelDown.score(x_test)
            res2 = modelGo.score(x_test)
            res3 = modelLeft.score(x_test)
            res4 = modelNo.score(x_test)
            res5 = modelOff.score(x_test)
            res6 = modelOn.score(x_test)
            res7 = modelRight.score(x_test)
            res8 = modelStop.score(x_test)
            res9 = modelUp.score(x_test)
            res10 = modelYes.score(x_test)
            res = [res1, res2, res3, res4, res5, res6, res7, res8, res9, res10]
            pred = np.argmax(res)
            preds.append(pred)
            labels.append(i)
    accu = accuracy_score(labels, preds)
    print("Accuracy = {}".format(accu))
    print("Confusion Matrix")
    print(confusion_matrix(labels, preds))

In [40]:
test_clean_data(clean_test_features)

Accuracy = 0.5481807277089165
Confusion Matrix
[[180   1  22   6   0   6  23   5   1   3]
 [ 26  89  21  22   1   1  41  29   6   2]
 [ 13   6 162   7   2   3  53   2   2  11]
 [ 59  20  42  57   1  11  34  16   3   2]
 [  8   7  10   0 139   7  16  66   4   0]
 [ 74   4  14  19   8  77  14  27   3   0]
 [ 17   2  21   3   0   0 197   4   0   7]
 [  5   2   5   1   5   2  13 203   7   2]
 [ 17   5  30   6   3   4  16 113  73   1]
 [  4   0  10   0   3   0  35   3   0 194]]


In [41]:
test_clean_data(noisy_test_features)

Accuracy = 0.3732
Confusion Matrix
[[ 78   2  22   4  96   4  11  20   0  10]
 [  6  55  20  11  75   1  25  38   5   2]
 [  6   4  98   3  82   0  32  15   1  20]
 [ 34  13  30  27  89   6  17  25   2   2]
 [  4   5   5   0 185   1   8  47   2   0]
 [ 38   2  10   6 103  46   7  24   2   1]
 [ 11   1  20   2  60   0 112  22   0  23]
 [  1   2   8   1  95   1   6 128   2   1]
 [  5   0  25   4 108   3   9  73  40   1]
 [  3   0  18   0  48   0   9   7   0 164]]


In [42]:
X_noisy = {}
l_noisy = {}
for i in range(len(words)):
    temp = noisy_train_features[i]
    lengths = np.array([len(x) for x in temp])
    feature = np.concatenate(temp, axis=0)
    X_noisy[words[i]] = feature
    l_noisy[words[i]] = lengths

In [43]:
modelDownNoisy = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelGoNoisy = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelLeftNoisy = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelNoNoisy = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelOffNoisy = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelOnNoisy = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelRightNoisy = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelStopNoisy = hmm.GaussianHMM(n_components=4, startprob_prior=start4, covariance_type="full", n_iter=40, verbose=True)
modelUpNoisy = hmm.GaussianHMM(n_components=2, startprob_prior=start2, covariance_type="full", n_iter=40, verbose=True)
modelYesNoisy = hmm.GaussianHMM(n_components=3, startprob_prior=start3, covariance_type="full", n_iter=40, verbose=True)

In [44]:
modelDownNoisy.fit(X_noisy['down'], lengths=l_noisy['down'])

         1    -1910899.0993             +nan
         2    19005521.1728   +20916420.2721
         3    19182924.7736     +177403.6008
         4    19236607.1468      +53682.3732
         5    19265951.7802      +29344.6334
         6    19287614.1686      +21662.3884
         7    19299910.7186      +12296.5500
         8    19307162.0604       +7251.3419
         9    19312388.8618       +5226.8013
        10    19315435.1186       +3046.2568
        11    19318252.8388       +2817.7201
        12    19322947.3773       +4694.5385
        13    19323861.7027        +914.3254
        14    19323787.5917         -74.1109


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [45]:
modelGoNoisy.fit(X_noisy['go'], lengths=l_noisy['go'])

         1    -1807876.6088             +nan
         2    18315897.4871   +20123774.0959
         3    18516019.7366     +200122.2495
         4    18540095.9834      +24076.2468
         5    18546632.2659       +6536.2825
         6    18550821.7041       +4189.4382
         7    18555912.0755       +5090.3714
         8    18568300.5601      +12388.4847
         9    18577778.6279       +9478.0678
        10    18579492.9483       +1714.3204
        11    18579969.7039        +476.7556
        12    18580511.6401        +541.9362
        13    18579748.9230        -762.7171


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [46]:
modelLeftNoisy.fit(X_noisy['left'], lengths=l_noisy['left'])

         1    -1814318.8435             +nan
         2    19083140.6531   +20897459.4966
         3    19257074.8697     +173934.2166
         4    19312296.9539      +55222.0842
         5    19332503.7329      +20206.7790
         6    19344288.9818      +11785.2489
         7    19351672.4181       +7383.4363
         8    19356000.0555       +4327.6373
         9    19358983.9536       +2983.8981
        10    19361523.9243       +2539.9708
        11    19363842.9138       +2318.9895
        12    19365425.6034       +1582.6896
        13    19366217.5992        +791.9959
        14    19366779.3359        +561.7367
        15    19367364.1762        +584.8403
        16    19367778.8349        +414.6588
        17    19367932.3086        +153.4736
        18    19368457.2706        +524.9620
        19    19368779.8362        +322.5656
        20    19369049.1456        +269.3094
        21    19369663.3778        +614.2322
        22    19370136.0117        +472.6340
        23

GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [47]:
modelNoNoisy.fit(X_noisy['no'], lengths=l_noisy['no'])

         1    -1866566.2977             +nan
         2    18702189.2463   +20568755.5440
         3    18898660.4890     +196471.2427
         4    18920356.8993      +21696.4103
         5    18927148.3436       +6791.4443
         6    18931658.6574       +4510.3137
         7    18936452.4987       +4793.8414
         8    18945953.0973       +9500.5986
         9    18957864.7344      +11911.6371
        10    18961571.0181       +3706.2837
        11    18962276.6318        +705.6137
        12    18962350.1471         +73.5153
        13    18962483.6951        +133.5480
        14    18962274.1446        -209.5505


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [48]:
modelOffNoisy.fit(X_noisy['off'], lengths=l_noisy['off'])

         1    -1707083.6647             +nan
         2    18915144.6081   +20622228.2728
         3    19086294.8843     +171150.2762
         4    19107920.0431      +21625.1589
         5    19112725.4470       +4805.4039
         6    19114553.8105       +1828.3635
         7    19116154.2473       +1600.4367
         8    19117071.6616        +917.4144
         9    19118398.8112       +1327.1496
        10    19120831.2283       +2432.4171
        11    19126376.3375       +5545.1092
        12    19135477.6193       +9101.2819
        13    19142451.6924       +6974.0730
        14    19145587.1268       +3135.4345
        15    19147458.9862       +1871.8593
        16    19148815.3519       +1356.3657
        17    19149158.0773        +342.7254
        18    19149329.6620        +171.5847
        19    19149726.2574        +396.5954
        20    19149444.6475        -281.6099


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [49]:
modelOnNoisy.fit(X_noisy['on'], lengths=l_noisy['on'])

         1    -1915473.3809             +nan
         2    18565065.7827   +20480539.1635
         3    18762635.9006     +197570.1179
         4    18778300.8462      +15664.9456
         5    18781573.1284       +3272.2822
         6    18783428.3987       +1855.2702
         7    18786857.3055       +3428.9068
         8    18794088.8111       +7231.5056
         9    18805890.5557      +11801.7446
        10    18810384.1249       +4493.5692
        11    18810886.8199        +502.6949
        12    18811099.9356        +213.1158
        13    18811491.0323        +391.0967
        14    18811277.1439        -213.8884


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [50]:
modelRightNoisy.fit(X_noisy['right'], lengths=l_noisy['right'])

         1    -1878381.0994             +nan
         2    18847129.4044   +20725510.5038
         3    19022784.7872     +175655.3828
         4    19082139.6748      +59354.8876
         5    19105982.3023      +23842.6275
         6    19124035.1552      +18052.8529
         7    19140314.3511      +16279.1960
         8    19151499.6694      +11185.3183
         9    19158593.7570       +7094.0876
        10    19163240.1110       +4646.3540
        11    19165996.0213       +2755.9103
        12    19168621.1473       +2625.1260
        13    19171783.0894       +3161.9421
        14    19174190.9682       +2407.8789
        15    19175756.6996       +1565.7314
        16    19178764.5958       +3007.8962
        17    19183512.5396       +4747.9438
        18    19184784.4444       +1271.9049
        19    19185146.8942        +362.4497
        20    19185199.4301         +52.5360
        21    19185272.0492         +72.6190
        22    19185165.9843        -106.0649


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [51]:
modelStopNoisy.fit(X_noisy['stop'], lengths=l_noisy['stop'])

         1    -1791657.0694             +nan
         2    19086102.0063   +20877759.0757
         3    19225258.5682     +139156.5618
         4    19295616.0450      +70357.4768
         5    19355272.1786      +59656.1337
         6    19395164.1677      +39891.9891
         7    19406455.8903      +11291.7226
         8    19410694.2218       +4238.3315
         9    19414628.9475       +3934.7257
        10    19415804.3206       +1175.3731
        11    19416127.1524        +322.8318
        12    19416352.6647        +225.5123
        13    19416326.5895         -26.0752


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=4, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0., 0.]),
            tol=0.01, transmat_prior=1.0, verbose=True)

In [52]:
modelUpNoisy.fit(X_noisy['up'], lengths=l_noisy['up'])

         1    -1531872.2797             +nan
         2    18121663.4076   +19653535.6873
         3    18307332.9762     +185669.5686
         4    18336403.5884      +29070.6122
         5    18345792.1282       +9388.5398
         6    18352442.3632       +6650.2350
         7    18356985.7160       +4543.3528
         8    18359765.3662       +2779.6503
         9    18360713.9764        +948.6102
        10    18361467.3501        +753.3737
        11    18361047.9606        -419.3895


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=2, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [53]:
modelYesNoisy.fit(X_noisy['yes'], lengths=l_noisy['yes'])

         1    -1929582.5640             +nan
         2    18810429.4815   +20740012.0456
         3    18981829.1152     +171399.6337
         4    19022795.2598      +40966.1446
         5    19045042.7875      +22247.5277
         6    19056689.6465      +11646.8590
         7    19061983.7309       +5294.0844
         8    19064647.5226       +2663.7917
         9    19066062.4946       +1414.9720
        10    19067292.5149       +1230.0203
        11    19068129.8402        +837.3253
        12    19068750.2773        +620.4371
        13    19069357.0568        +606.7796
        14    19069862.6552        +505.5984
        15    19070104.6770        +242.0218
        16    19070112.0342          +7.3572
        17    19070274.5632        +162.5290
        18    19070353.3646         +78.8013
        19    19070286.2514         -67.1132


GaussianHMM(algorithm='viterbi', covariance_type='full', covars_prior=0.01,
            covars_weight=1, init_params='stmc', means_prior=0, means_weight=0,
            min_covar=0.001, n_components=3, n_iter=40, params='stmc',
            random_state=None, startprob_prior=array([1., 0., 0.]), tol=0.01,
            transmat_prior=1.0, verbose=True)

In [54]:
def test_noisy_data(testing_features):
    preds = []
    labels = []
    for i in range(len(testing_features)):
        for j in range(len(testing_features[i])):
            x_test = testing_features[i][j]
            res1 = modelDownNoisy.score(x_test)
            res2 = modelGoNoisy.score(x_test)
            res3 = modelLeftNoisy.score(x_test)
            res4 = modelNoNoisy.score(x_test)
            res5 = modelOffNoisy.score(x_test)
            res6 = modelOnNoisy.score(x_test)
            res7 = modelRightNoisy.score(x_test)
            res8 = modelStopNoisy.score(x_test)
            res9 = modelUpNoisy.score(x_test)
            res10 = modelYesNoisy.score(x_test)
            res = [res1, res2, res3, res4, res5, res6, res7, res8, res9, res10]
            pred = np.argmax(res)
            preds.append(pred)
            labels.append(i)
    accu = accuracy_score(labels, preds)
    print("Accuracy = {}".format(accu))
    print("Confusion Matrix")
    print(confusion_matrix(labels, preds))

In [55]:
test_noisy_data(noisy_test_features)

Accuracy = 0.444
Confusion Matrix
[[149   3  20  12   2  11  28  14   4   4]
 [ 34  54  41  15   2   7  53  25   7   0]
 [ 14   2 155   5   1   0  44  16   7  17]
 [ 77  15  53  35   2  11  31  16   3   2]
 [ 14   2   5   1  75   9   7 132  12   0]
 [118   3  11   8   9  44   7  32   7   0]
 [ 13   4  22   4   2   1 187   8   2   8]
 [  6   3  13   3   5   3  11 196   3   2]
 [ 20   9  47   6  11   4  12 105  53   1]
 [ 10   0  22   5   1   0  39   9   1 162]]


In [56]:
test_noisy_data(clean_test_features)

Accuracy = 0.5053978408636546
Confusion Matrix
[[155   2  12  29   0  20  17   4   2   6]
 [ 31  83  25  26   0   7  39  21   5   1]
 [ 16   3 159   7   0   0  36  14   1  25]
 [ 67  18  36  67   2  16  30   4   4   1]
 [ 10  10  14   3 104  16   8  87   5   0]
 [102   6  14  15   9  72   6  14   2   0]
 [ 18   3  18   5   0   0 194   6   1   6]
 [ 11   7  12   3   9   4  14 173   6   6]
 [ 19  14  53   4   8   9  13 108  39   1]
 [  3   0   6   4   0   0  15   3   0 218]]
