In [2]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
from sklearn.utils import check_random_state
import joblib

def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    
    #mfcc feature
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta mfcc feature 1st order and 2nd order
    delta1_mfcc = librosa.feature.delta(mfcc, order=1)
    delta2_mfcc = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1_mfcc, delta2_mfcc], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    print('data_dir: ', data_dir)
    print(f'mfcc.shape: {np.array(mfcc).shape}')
    return mfcc

def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=100, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  


    
class_names = [ "yte", "dich", "hoc", "cachly", "nha", "test_nha", "test_hoc", "test_cachly", "test_dich" , "test_yte" ]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("../full_data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items() if(not k.startswith("test"))], axis=0)
print("vectors", all_vectors.shape)

# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors, 20)
print("centers", kmeans.cluster_centers_.shape)
joblib.dump(kmeans, f"kmeans_1.pkl")
print(f"Save kmeans_1 success")
    

Load yte dataset
data_dir:  ../full_data/yte
mfcc.shape: (67,)
Load dich dataset
data_dir:  ../full_data/dich
mfcc.shape: (60,)
Load hoc dataset
data_dir:  ../full_data/hoc
mfcc.shape: (76,)
Load cachly dataset
data_dir:  ../full_data/cachly
mfcc.shape: (75,)
Load nha dataset
data_dir:  ../full_data/nha
mfcc.shape: (69,)
Load test_nha dataset
data_dir:  ../full_data/test_nha
mfcc.shape: (23,)
Load test_hoc dataset
data_dir:  ../full_data/test_hoc
mfcc.shape: (23,)
Load test_cachly dataset
data_dir:  ../full_data/test_cachly
mfcc.shape: (23,)
Load test_dich dataset
data_dir:  ../full_data/test_dich
mfcc.shape: (21,)
Load test_yte dataset
data_dir:  ../full_data/test_yte
mfcc.shape: (20,)
vectors (33614, 36)
centers (20, 36)
centers (20, 36)
Save kmeans_1 success


In [3]:
test_datas = {}
num_state = {}


startprob = {}

startprob['dich'] = np.array([0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ])
startprob['hoc'] = np.array([0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ])
startprob['yte'] = np.array([0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ])
startprob['cachly'] = np.array([0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,  0.  , 0.  , 0.  , 0.  , 0.  ])
# |ŋ|-|ʷə|-|j|
startprob['nha'] = np.array([0.7 , 0.2 , 0.05, 0.05, 0.  , 0.   ])

num_state['dich'] = 9
num_state['hoc'] = 9
num_state['yte'] = 10
num_state['cachly'] = 16
num_state['nha'] = 6

transmat = {}
transmat['dich'] = np.array(  [[0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ],
                           [0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  ],
                           [0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  ],
                           [0.  , 0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  ],
                           [0.  , 0.  , 0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.2 , 0.1 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.3 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

transmat['hoc'] = np.array(  [[0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ],
                           [0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  ],
                           [0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  , 0.  ],
                           [0.  , 0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  , 0.  ],
                           [0.  , 0.  , 0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05, 0.  ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.2 , 0.05, 0.05],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.2 , 0.1 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.3 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

transmat['yte'] = np.array([[0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ],
                           [0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  ],
                           [0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  ],
                           [0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  ],
                           [0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.1 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.2 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.3 ],
                           [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

transmat['cachly'] = np.array([[0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ,    0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  , 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05, 0.  ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.05, 0.05],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.1 , 0.1 ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.1 , 0.2 ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.7 , 0.3 ],
       [0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 0.  , 1.  ]])

transmat['nha'] = np.array([[0.7, 0.2, 0.1, 0. , 0. , 0. ],
                           [0. , 0.7, 0.2, 0.1, 0. , 0. ],
                           [0. , 0. , 0.7, 0.2, 0.1, 0. ],
                           [0. , 0. , 0. , 0.7, 0.2, 0.1],
                           [0. , 0. , 0. , 0. , 0.7, 0.3],
                           [0. , 0. , 0. , 0. , 0. , 1. ]])

train_class_name = [ "yte", "dich", "hoc", "cachly", "nha"]


iteration_ = 1

while(iteration_>0):
    models = {}
    train_datas = {}
    iteration_ -= 1
    for cname in train_class_name :

        #Clustering 
        train_datas[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])

        #Initialization
        hmm = hmmlearn.hmm.MultinomialHMM(
            n_components=num_state[cname], random_state= iteration_ , n_iter=1000, verbose=True,
            params='te',
            init_params='e'
        )
        
        hmm.startprob_= startprob[cname]
        hmm.n_features = 20
        hmm.transmat_= transmat[cname]

        #Fiting data and train
        X = np.concatenate(train_datas[cname])
        lengths = list([len(x) for x in train_datas[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X, lengths=lengths)
        models[cname] = hmm
        
        # save model
        joblib.dump(models[cname], f"{cname}_iter{iteration_}.pkl")
        print(f"Save model {cname}_iter{iteration_} successfully")

    print(f"\nIter {iteration_} : Finish training\n")

        
    testing_set = {}
    for k, v in train_datas.items():
        testing_set[k] = list([kmeans.predict(v).reshape(-1,1) for v in dataset["test_"+k]])

    num_true = {}
    num_false = {}

    #Initialization:
    for name in train_class_name:
        num_true[name] = 0
        num_false[name] = 0

    #Testing:
    for true_cname in train_class_name:
        print(f"Testing word {true_cname}:")
        for O in testing_set[true_cname]:
            scores = {cname : model.score(O, [len(O)]) for cname, model in models.items()}
            pred_name = max(scores, key=lambda key: scores[key])                
            if(pred_name == true_cname):
                num_true[true_cname] += 1
            else:
                num_false[true_cname] += 1            
    print("Test Result: ")
    for name in train_class_name:
        acc =  100*num_true[name]/(num_true[name]+num_false[name])
        print(f"Acc on word \"{name}\": {acc}")
        print(f"Correct Rate: {num_true[name]}\\{num_true[name]+num_false[name]} ")
        print()
    sum_true = sum(num_true.values())
    sum_false = sum(num_false.values())
    print(f"Overall Acc: {sum_true*100 / (sum_true+sum_false)}")
    
    print(f"\nIter {iteration_} : Finish testing\n******************************************************\n\n")            

training class yte
(2954, 1) [43, 34, 42, 35, 44, 29, 47, 28, 40, 50, 38, 40, 39, 35, 37, 54, 29, 37, 54, 53, 31, 41, 61, 37, 33, 50, 42, 48, 62, 39, 44, 34, 31, 42, 49, 77, 45, 44, 61, 66, 31, 47, 49, 35, 79, 34, 39, 50, 41, 39, 44, 68, 32, 38, 36, 34, 53, 44, 53, 38, 49, 60, 43, 41, 48, 41, 43] 67
Have 20 features
Set n_features success


         1       -8566.6107             +nan
         2       -6316.3932       +2250.2175
         3       -5893.2521        +423.1411
         4       -5696.0123        +197.2398
         5       -5610.5387         +85.4735
         6       -5522.5350         +88.0037
         7       -5406.9065        +115.6285
         8       -5343.2369         +63.6697
         9       -5299.8013         +43.4356
        10       -5276.8663         +22.9350
        11       -5257.2761         +19.5903
        12       -5238.0815         +19.1946
        13       -5223.5035         +14.5780
        14       -5216.9443          +6.5591
        15       -5213.1221          +3.8222
        16       -5209.0781          +4.0440
        17       -5199.8109          +9.2672
        18       -5188.1170         +11.6939
        19       -5182.6579          +5.4591
        20       -5179.4285          +3.2293
        21       -5175.1873          +4.2412
        22       -5172.9195          +2.2678
        23

Save model yte_iter0 successfully
training class dich
(5099, 1) [71, 87, 90, 67, 97, 103, 103, 92, 67, 72, 78, 83, 88, 95, 63, 78, 92, 98, 98, 82, 85, 89, 84, 87, 61, 88, 91, 98, 97, 85, 84, 72, 79, 92, 92, 65, 79, 88, 84, 91, 92, 81, 98, 98, 78, 101, 66, 73, 91, 88, 88, 85, 101, 89, 62, 84, 88, 70, 83, 88] 60
Have 20 features
Set n_features success


         2       -8028.0798       +5371.6272
         3       -7634.9300        +393.1498
         4       -6620.6043       +1014.3257
         5       -6411.1905        +209.4138
         6       -6357.0607         +54.1298
         7       -6326.3862         +30.6745
         8       -6315.5310         +10.8552
         9       -6310.5840          +4.9471
        10       -6307.8592          +2.7248
        11       -6305.4113          +2.4479
        12       -6298.9636          +6.4478
        13       -6281.7408         +17.2228
        14       -6261.2750         +20.4659
        15       -6258.6337          +2.6413
        16       -6257.9476          +0.6860
        17       -6257.5296          +0.4180
        18       -6257.1986          +0.3310
        19       -6256.8984          +0.3002
        20       -6256.6015          +0.2970
        21       -6256.2903          +0.3112
        22       -6255.9525          +0.3378
        23       -6255.5809          +0.3717
        24

Save model dich_iter0 successfully
training class hoc
(10532, 1) [118, 107, 140, 154, 129, 101, 148, 153, 145, 155, 108, 130, 153, 153, 135, 152, 155, 168, 125, 137, 124, 173, 173, 112, 122, 141, 155, 130, 148, 153, 163, 163, 133, 120, 125, 155, 144, 161, 143, 153, 137, 119, 158, 116, 155, 130, 140, 148, 141, 153, 104, 145, 127, 145, 126, 126, 120, 120, 125, 132, 135, 129, 134, 161, 148, 163, 133, 124, 124, 130, 153, 127, 158, 130, 155, 104] 76
Have 20 features
Set n_features success


         1      -28986.9419             +nan
         2      -15612.7827      +13374.1592
         3      -13327.8654       +2284.9173
         4      -12563.2337        +764.6317
         5      -12282.6156        +280.6181
         6      -12185.5031         +97.1125
         7      -12127.3976         +58.1055
         8      -12093.3136         +34.0840
         9      -12080.9243         +12.3893
        10      -12073.9138          +7.0105
        11      -12068.1288          +5.7850
        12      -12063.1188          +5.0100
        13      -12060.6691          +2.4497
        14      -12059.8507          +0.8184
        15      -12059.4211          +0.4296
        16      -12059.1427          +0.2784
        17      -12058.9522          +0.1904
        18      -12058.8176          +0.1347
        19      -12058.7188          +0.0988
        20      -12058.6431          +0.0757
        21      -12058.5824          +0.0607
        22      -12058.5313          +0.0511
        23

Save model hoc_iter0 successfully
training class cachly
(4002, 1) [82, 50, 56, 33, 61, 43, 65, 69, 67, 76, 49, 31, 61, 41, 90, 33, 39, 61, 49, 37, 61, 71, 38, 63, 51, 39, 68, 32, 65, 49, 34, 64, 36, 50, 43, 55, 89, 60, 59, 56, 70, 58, 61, 37, 30, 51, 68, 51, 60, 63, 59, 28, 61, 59, 28, 60, 29, 59, 64, 57, 64, 33, 89, 46, 35, 59, 56, 89, 35, 35, 59, 50, 54, 34, 35] 75
Have 20 features
Set n_features success


         1      -11586.5822             +nan
         2       -7837.5355       +3749.0467
         3       -6959.5878        +877.9478
         4       -6599.5959        +359.9919
         5       -6468.3894        +131.2064
         6       -6397.3936         +70.9958
         7       -6335.2649         +62.1287
         8       -6282.7807         +52.4842
         9       -6237.4276         +45.3531
        10       -6216.8799         +20.5477
        11       -6205.4086         +11.4713
        12       -6197.1763          +8.2323
        13       -6192.4084          +4.7679
        14       -6189.3248          +3.0837
        15       -6187.1768          +2.1479
        16       -6185.3977          +1.7792
        17       -6183.6528          +1.7449
        18       -6182.0468          +1.6060
        19       -6180.8709          +1.1759
        20       -6179.9211          +0.9498
        21       -6179.0942          +0.8269
        22       -6178.3597          +0.7346
        23

Save model cachly_iter0 successfully
training class nha
(11027, 1) [153, 159, 145, 150, 137, 159, 154, 154, 140, 181, 156, 194, 132, 225, 148, 166, 140, 210, 163, 145, 130, 143, 143, 158, 153, 186, 137, 192, 179, 186, 143, 153, 135, 156, 137, 159, 166, 163, 156, 140, 158, 184, 152, 142, 147, 155, 173, 179, 147, 143, 163, 143, 189, 145, 122, 204, 140, 137, 161, 148, 158, 156, 171, 148, 220, 217, 147, 207, 145] 69
Have 20 features
Set n_features success


         1      -33190.8020             +nan
         2      -15122.4250      +18068.3770
         3      -12723.8589       +2398.5661
         4      -12559.9404        +163.9186
         5      -12469.8380         +90.1024
         6      -12411.8179         +58.0201
         7      -12375.1732         +36.6447
         8      -12344.9138         +30.2594
         9      -12328.6958         +16.2180
        10      -12320.4044          +8.2914
        11      -12316.6773          +3.7271
        12      -12314.4616          +2.2157
        13      -12312.1973          +2.2644
        14      -12309.8131          +2.3842
        15      -12308.4776          +1.3355
        16      -12307.9212          +0.5564
        17      -12307.3964          +0.5248
        18      -12306.2059          +1.1905
        19      -12305.1641          +1.0417
        20      -12304.6491          +0.5151
        21      -12304.3762          +0.2729
        22      -12304.2184          +0.1578
        23

Save model nha_iter0 successfully

Iter 0 : Finish training

Testing word yte:
Testing word dich:
Testing word hoc:
Testing word cachly:
Testing word nha:
Test Result: 
Acc on word "yte": 80.0
Correct Rate: 16\20 

Acc on word "dich": 100.0
Correct Rate: 21\21 

Acc on word "hoc": 100.0
Correct Rate: 23\23 

Acc on word "cachly": 95.65217391304348
Correct Rate: 22\23 

Acc on word "nha": 100.0
Correct Rate: 23\23 

Overall Acc: 95.45454545454545

Iter 0 : Finish testing
******************************************************




In [13]:
for cname in train_class_name:
    with np.printoptions(precision=2, suppress=True):
        print("*"*80)
        print("MODEL : ", cname)
        print()
        print("Start probability:")
        print(models[cname].startprob_) 
        print()
        print("Transition matrix:")
        print(models[cname].transmat_)
        print()
        print("Emission probability matrix:")
        print(models[cname].emissionprob_)
        print()       
        


********************************************************************************
MODEL :  yte

Start probability:
[0.7  0.1  0.1  0.05 0.05 0.   0.   0.   0.   0.  ]

Transition matrix:
[[0.71 0.09 0.2  0.   0.   0.   0.   0.   0.   0.  ]
 [0.   0.8  0.15 0.03 0.01 0.   0.   0.   0.   0.  ]
 [0.   0.   0.9  0.1  0.01 0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.87 0.12 0.   0.   0.   0.   0.  ]
 [0.   0.   0.   0.   0.76 0.19 0.03 0.02 0.   0.  ]
 [0.   0.   0.   0.   0.   0.88 0.12 0.   0.   0.  ]
 [0.   0.   0.   0.   0.   0.   0.86 0.01 0.13 0.  ]
 [0.   0.   0.   0.   0.   0.   0.   0.93 0.05 0.02]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.95 0.05]
 [0.   0.   0.   0.   0.   0.   0.   0.   0.   1.  ]]

Emission probability matrix:
[[0.09 0.   0.01 0.   0.   0.02 0.   0.14 0.02 0.03 0.18 0.   0.01 0.27
  0.   0.   0.   0.22 0.   0.01]
 [0.42 0.   0.   0.   0.03 0.45 0.   0.   0.   0.   0.04 0.   0.   0.
  0.   0.   0.   0.05 0.   0.  ]
 [0.   0.   0.   0.02 0.55 0.13 0.   0.   0. 