In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
from operator import itemgetter, attrgetter
import pickle as pk

def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

def clustering(X, n_clusters=15):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

In [2]:
if __name__ == "__main__":
    train_names = ["Nguoi","Duoc","Cothe","Trong","Dang"]
    test_names = ["test_Cothe","test_Nguoi","test_Duoc","test_Dang","test_Trong"]
    train_dataset = {}
    test_dataset ={}
    for cname in train_names:
        print(f"Load {cname} dataset")
        train_dataset[cname] = get_class_data(os.path.join("Data", cname))
        
    for cname in test_names:
        print(f"Load {cname} dataset")
        test_dataset[cname] = get_class_data(os.path.join("Data", cname))

    # Get all vectors in the train_dataset
    all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in train_dataset.items()], axis=0)
    print("vectors", all_vectors.shape)
    # Run K-Means algorithm to get clusters
    kmeans = clustering(all_vectors)
    print("centers", kmeans.cluster_centers_.shape)
    
    for cname in train_dataset:
        train_dataset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in train_dataset[cname]])
    for cname in test_dataset:
        test_dataset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in test_dataset[cname]])
    
    models = {}

Load Nguoi dataset
Load Duoc dataset
Load Cothe dataset
Load Trong dataset
Load Dang dataset
Load test_Cothe dataset
Load test_Nguoi dataset
Load test_Duoc dataset
Load test_Dang dataset
Load test_Trong dataset
vectors (11440, 36)
centers (15, 36)
centers (15, 36)


In [3]:

    cname = 'Nguoi'
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=12, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
    hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
    hmm.transmat_ =np.array([
        [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])
    if cname[:4] != 'test':
        X = np.concatenate(train_dataset[cname])
        lengths = list([len(x) for x in train_dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
    print("Training done")

    

training class Nguoi
(3536, 1) [21, 18, 22, 15, 21, 30, 20, 28, 19, 16, 12, 14, 27, 23, 21, 17, 33, 14, 27, 23, 22, 15, 12, 16, 16, 41, 20, 46, 16, 39, 20, 26, 16, 15, 20, 19, 27, 17, 16, 22, 28, 20, 23, 25, 20, 19, 16, 39, 17, 26, 28, 28, 31, 22, 38, 18, 40, 18, 28, 28, 16, 20, 15, 19, 1282, 12, 17, 18, 46, 17, 21, 14, 19, 26, 39, 18, 16, 24, 22, 27, 15, 22, 17, 26, 36, 32, 25, 19, 30, 21, 25, 20, 21, 23, 19, 24, 36, 26, 21, 16] 100


         1       -9800.7004             +nan
         2       -7284.1672       +2516.5332
         3       -6709.9937        +574.1735
         4       -6446.9167        +263.0771
         5       -6271.0384        +175.8783
         6       -6156.7046        +114.3338
         7       -6088.0902         +68.6143
         8       -6034.8680         +53.2222
         9       -5988.1145         +46.7535
        10       -5949.7823         +38.3322
        11       -5918.3506         +31.4317
        12       -5896.8425         +21.5081
        13       -5887.3353          +9.5073
        14       -5881.8793          +5.4560
        15       -5877.7975          +4.0818
        16       -5872.2955          +5.5020
        17       -5862.0215         +10.2740
        18       -5854.5170          +7.5045
        19       -5848.3087          +6.2083
        20       -5845.1491          +3.1596
        21       -5842.7820          +2.3671
        22       -5837.5404          +5.2416
        23

Training done


        50       -5827.5390          +0.0153
        51       -5827.5281          +0.0109
        52       -5827.5202          +0.0079


In [4]:
    cname = 'Duoc'
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=12, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
    hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
    hmm.transmat_ =np.array([
        [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])
    if cname[:4] != 'test':
        X = np.concatenate(train_dataset[cname])
        lengths = list([len(x) for x in train_dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
    print("Training done")

         1       -3189.0778             +nan
         2       -2401.0292        +788.0486


training class Duoc
(1192, 1) [20, 15, 13, 17, 36, 16, 16, 20, 23, 18, 16, 20, 26, 24, 17, 17, 34, 21, 21, 21, 16, 15, 13, 16, 17, 16, 11, 12, 17, 21, 20, 11, 17, 16, 11, 13, 15, 16, 14, 14, 17, 17, 13, 10, 15, 23, 14, 11, 18, 17, 24, 14, 18, 9, 11, 14, 16, 15, 20, 14, 15, 19, 17, 14, 16, 21, 15, 14, 24, 15] 70


         3       -2103.1305        +297.8987
         4       -1930.6888        +172.4418
         5       -1857.6208         +73.0679
         6       -1817.7862         +39.8346
         7       -1790.2542         +27.5320
         8       -1772.0638         +18.1904
         9       -1760.2779         +11.7859
        10       -1752.5964          +7.6814
        11       -1747.7040          +4.8924
        12       -1743.7605          +3.9435
        13       -1740.9119          +2.8486
        14       -1739.3868          +1.5251
        15       -1738.4053          +0.9815
        16       -1737.6451          +0.7602
        17       -1736.9959          +0.6492
        18       -1736.4101          +0.5858
        19       -1735.8592          +0.5509
        20       -1735.3234          +0.5358
        21       -1734.7890          +0.5344
        22       -1734.2434          +0.5455
        23       -1733.6714          +0.5720
        24       -1733.0659          +0.6055
        25

Training done


        80       -1721.9029          +0.0111
        81       -1721.8922          +0.0107
        82       -1721.8818          +0.0104
        83       -1721.8716          +0.0102
        84       -1721.8615          +0.0101
        85       -1721.8516          +0.0099


In [5]:
    cname = 'Cothe'
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=12, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
    hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
    hmm.transmat_ =np.array([
        [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])
    if cname[:4] != 'test':
        X = np.concatenate(train_dataset[cname])
        lengths = list([len(x) for x in train_dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
    print("Training done")

training class Cothe
(3860, 1) [51, 31, 23, 36, 24, 35, 44, 33, 48, 32, 51, 32, 52, 42, 39, 27, 54, 46, 37, 36, 30, 46, 41, 34, 47, 37, 29, 40, 31, 29, 33, 37, 46, 48, 36, 34, 32, 30, 44, 61, 36, 39, 35, 40, 74, 37, 60, 70, 35, 49, 40, 35, 54, 45, 40, 32, 31, 48, 83, 36, 56, 31, 40, 43, 46, 44, 38, 30, 32, 29, 39, 29, 36, 35, 34, 34, 35, 31, 39, 32, 32, 41, 45, 46, 29, 35, 33, 40, 47, 36, 31, 28, 26, 33, 26, 31, 29, 25, 23, 34] 100


         1      -10201.7360             +nan
         2       -7630.9186       +2570.8175
         3       -7020.8507        +610.0679
         4       -6709.1055        +311.7452
         5       -6543.6459        +165.4597
         6       -6410.9923        +132.6536
         7       -6301.0216        +109.9707
         8       -6240.0817         +60.9399
         9       -6209.0559         +31.0257
        10       -6187.7501         +21.3058
        11       -6166.7902         +20.9599
        12       -6153.1540         +13.6362
        13       -6146.2773          +6.8766
        14       -6141.3147          +4.9626
        15       -6134.2389          +7.0759
        16       -6118.0924         +16.1465
        17       -6099.5889         +18.5035
        18       -6092.2988          +7.2901
        19       -6089.4114          +2.8875
        20       -6086.9057          +2.5056
        21       -6084.2218          +2.6839
        22       -6081.2716          +2.9502
        23

Training done


In [6]:
    cname = 'Trong'
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=9, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
    hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
    hmm.transmat_ =np.array([
        [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])
    if cname[:4] != 'test':
        X = np.concatenate(train_dataset[cname])
        lengths = list([len(x) for x in train_dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
    print("Training done")

training class Trong
(1506, 1) [20, 20, 21, 28, 23, 25, 24, 25, 23, 16, 15, 16, 24, 20, 19, 22, 26, 23, 14, 12, 18, 23, 14, 18, 34, 24, 17, 14, 19, 15, 18, 29, 22, 22, 21, 23, 18, 24, 34, 33, 30, 23, 19, 12, 19, 23, 25, 22, 19, 16, 26, 21, 20, 18, 24, 28, 20, 20, 28, 21, 21, 21, 25, 25, 25, 21, 21, 19, 22, 16] 70


         1       -3850.9506             +nan
         2       -2414.9228       +1436.0277
         3       -2164.9793        +249.9435
         4       -2097.0745         +67.9047
         5       -2065.4967         +31.5778
         6       -2042.8151         +22.6816
         7       -2023.9844         +18.8307
         8       -2008.9652         +15.0192
         9       -1996.7911         +12.1741
        10       -1985.4853         +11.3058
        11       -1972.5161         +12.9692
        12       -1958.1518         +14.3643
        13       -1946.9727         +11.1791
        14       -1941.4164          +5.5563
        15       -1935.8556          +5.5608
        16       -1931.8103          +4.0452
        17       -1930.5232          +1.2871
        18       -1929.9518          +0.5715
        19       -1929.6153          +0.3365
        20       -1929.3433          +0.2720
        21       -1929.0695          +0.2738
        22       -1928.7583          +0.3111
        23

Training done


        86       -1904.6281          +0.0099


In [7]:
    cname = 'Dang'
    hmm = hmmlearn.hmm.MultinomialHMM(n_components=9, random_state=0, n_iter=1000, verbose=True, init_params='e', params='ste')
    hmm.startprob_ = np.array([0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ])
    hmm.transmat_ =np.array([
        [0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, 0.0, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.2, 0.1, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.7, 0.3, ],
        [0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 1.0, ],])
    if cname[:4] != 'test':
        X = np.concatenate(train_dataset[cname])
        lengths = list([len(x) for x in train_dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
    print("Training done")

         1       -3496.3919             +nan
         2       -2606.7222        +889.6697
         3       -2303.6548        +303.0674
         4       -2170.3805        +133.2743
         5       -2111.0008         +59.3797


training class Dang
(1346, 1) [22, 23, 13, 16, 21, 15, 24, 15, 22, 16, 26, 17, 11, 25, 24, 25, 18, 14, 18, 17, 18, 18, 14, 18, 25, 25, 21, 14, 13, 20, 20, 33, 17, 22, 14, 26, 29, 20, 21, 13, 18, 22, 22, 18, 17, 24, 18, 27, 15, 21, 18, 16, 16, 18, 18, 23, 20, 19, 19, 12, 20, 24, 17, 14, 12, 19, 19, 13, 22, 22] 70


         6       -2065.7451         +45.2557
         7       -2036.5147         +29.2303
         8       -2007.6358         +28.8789
         9       -1981.1931         +26.4427
        10       -1962.2811         +18.9120
        11       -1950.4810         +11.8001
        12       -1942.2529          +8.2281
        13       -1937.5599          +4.6930
        14       -1934.8968          +2.6631
        15       -1933.0750          +1.8218
        16       -1931.6002          +1.4748
        17       -1930.2435          +1.3567
        18       -1928.9208          +1.3227
        19       -1927.7044          +1.2163
        20       -1926.7430          +0.9615
        21       -1926.0823          +0.6607
        22       -1925.6526          +0.4297
        23       -1925.3674          +0.2852
        24       -1925.1705          +0.1969
        25       -1925.0318          +0.1387
        26       -1924.9336          +0.0982
        27       -1924.8634          +0.0702
        28

Training done


        37       -1924.6230          +0.0096


In [8]:
    print("Testing")
    test_number = {}
    test_correct = {}
    for true_cname in test_names:
        total = 0
        sum = 0
        print(true_cname)
        for O in test_dataset[true_cname]:
            total += 1
            scores = {}
            for cname, model in models.items():
                score = model.score(O, [len(O)])
                scores[cname] = score
            srt = sorted(scores.items(), key=lambda x: x[1], reverse=True)
            print(srt)
            key , value = srt[0]
            if key == true_cname[5:]:
                sum += 1
        test_number[true_cname[5:]] = total
        test_correct[true_cname[5:]] = sum   
    for cname in train_names:
        print('Accuracy:', cname, test_correct[cname]/test_number[cname])
    
    print(test_number)

Testing
test_Cothe
[('Cothe', -39.1810220433168), ('Nguoi', -91.83945324280968), ('Trong', -100.12842935541282), ('Dang', -252.51904548661366), ('Duoc', -1946.620265853872)]
[('Cothe', -37.55109692531185), ('Duoc', -47.790971735920934), ('Trong', -70.34063036493018), ('Nguoi', -99.48720189309608), ('Dang', -164.60203198113967)]
[('Cothe', -36.469824454148934), ('Nguoi', -46.56941958192726), ('Duoc', -55.23839590515513), ('Trong', -78.82191397500317), ('Dang', -89.04205378948036)]
[('Cothe', -54.388293757554514), ('Nguoi', -106.214973132289), ('Dang', -442.85646178468437), ('Duoc', -4194.850052462988), ('Trong', -inf)]
[('Cothe', -49.63688120709736), ('Nguoi', -52.56659069099317), ('Duoc', -63.25404171111571), ('Trong', -70.26472378481958), ('Dang', -91.72572042336485)]
[('Nguoi', -35.29937392047264), ('Cothe', -46.397207735224825), ('Duoc', -56.17414858151827), ('Trong', -92.10771169114352), ('Dang', -100.42884039277929)]
[('Cothe', -33.26170746242029), ('Nguoi', -36.7745954080314), ('

In [9]:
np.around(models['Nguoi'].transmat_, 2)

array([[0.69, 0.02, 0.29, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.84, 0.15, 0.01, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.65, 0.01, 0.34, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.58, 0.  , 0.42, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.7 , 0.17, 0.13, 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.8 , 0.02, 0.18, 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.57, 0.43, 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.78, 0.  , 0.22, 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99, 0.01, 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.76, 0.23,
        0.01],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.99,
        0.01],
       [0.  , 0.  , 0

In [10]:
np.around(models['Cothe'].transmat_, 2)

array([[0.77, 0.02, 0.21, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.64, 0.36, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.61, 0.02, 0.38, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.99, 0.  , 0.01, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.52, 0.4 , 0.08, 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.83, 0.17, 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.81, 0.18, 0.01, 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.78, 0.14, 0.08, 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.86, 0.14, 0.01,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.81, 0.19,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.8 ,
        0.2 ],
       [0.  , 0.  , 0

In [11]:
np.around(models['Trong'].transmat_, 2)

array([[0.36, 0.64, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.43, 0.47, 0.1 , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.67, 0.33, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.3 , 0.69, 0.01, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.65, 0.27, 0.08, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.69, 0.08, 0.23, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.3 , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.8 , 0.2 ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

In [12]:
np.around(models['Duoc'].transmat_, 2)

array([[0.56, 0.21, 0.23, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.49, 0.22, 0.29, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.43, 0.51, 0.06, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.63, 0.25, 0.12, 0.  , 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.66, 0.13, 0.21, 0.  , 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.42, 0.5 , 0.08, 0.  , 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.63, 0.16, 0.21, 0.  , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.4 , 0.4 , 0.2 , 0.  ,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.61, 0.16, 0.23,
        0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.75, 0.1 ,
        0.15],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.97,
        0.03],
       [0.  , 0.  , 0

In [13]:
np.around(models['Dang'].transmat_, 2)

array([[0.71, 0.08, 0.21, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.61, 0.39, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.48, 0.2 , 0.32, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.81, 0.14, 0.05, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.77, 0.01, 0.23, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.46, 0.13, 0.41, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.59, 0.41, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.71, 0.29],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

In [14]:
print("Exporting models")
for word in train_names:
    with open(os.path.join("Models", word + ".pkl"), "wb") as file: pk.dump(models[word], file)

Exporting models


In [15]:
    mic_names = ["test_Cothe","test_Nguoi","test_Duoc","test_Dang","test_Trong"]
    mic_dataset = {}
    for cname in mic_names:
        print(f"Load {cname} dataset")
        mic_dataset[cname] = get_class_data(os.path.join("MicTest", cname))
        
    for cname in mic_names:
        mic_dataset[cname] = list([kmeans.predict(v).reshape(-1, 1) for v in mic_dataset[cname]])

Load test_Cothe dataset
Load test_Nguoi dataset
Load test_Duoc dataset
Load test_Dang dataset
Load test_Trong dataset


In [16]:
    print("Testing mic")
    mic_number = {}
    mic_correct = {}
    for true_cname in mic_names:
        total = 0
        sum = 0
        print(true_cname)
        for O in mic_dataset[true_cname]:
            total += 1
            scores = {}
            for cname, model in models.items():
                score = model.score(O, [len(O)])
                scores[cname] = score
            srt = sorted(scores.items(), key=lambda x: x[1], reverse=True)
            print(srt)
            key , value = srt[0]
            if key == true_cname[5:]:
                sum += 1
        mic_number[true_cname[5:]] = total
        mic_correct[true_cname[5:]] = sum   
    for cname in train_names:
        print('Accuracy:', cname, mic_correct[cname]/mic_number[cname])
    
    print(mic_number)

Testing mic
test_Cothe
[('Cothe', -69.03411657084484), ('Nguoi', -164.34203271668113), ('Trong', -227.00595925369095), ('Dang', -349.44998542102667), ('Duoc', -3953.3402506687416)]
[('Cothe', -137.12686379702487), ('Nguoi', -213.97507940108747), ('Trong', -263.92008554408716), ('Dang', -687.6851879364388), ('Duoc', -9244.824433804652)]
[('Cothe', -110.43577968042646), ('Nguoi', -215.16004674157702), ('Trong', -280.86243848632813), ('Dang', -934.4184169696232), ('Duoc', -11682.744489392571)]
[('Cothe', -73.5553906715363), ('Nguoi', -169.51429065650626), ('Trong', -220.60053580954963), ('Dang', -371.1867733834266), ('Duoc', -6841.246040598831)]
[('Cothe', -105.0493165612015), ('Nguoi', -164.46837876139375), ('Trong', -239.88314807726778), ('Dang', -546.4342453207256), ('Duoc', -6990.957234689692)]
[('Cothe', -107.34999004516412), ('Nguoi', -217.07314729521957), ('Trong', -712.125382746118), ('Dang', -743.1693575013269), ('Duoc', -8677.899330053075)]
[('Cothe', -85.89332758449027), ('Nguo