In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    yt, index = librosa.effects.trim(y) # Trim the beginning and ending silence
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        yt, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # energy feature
    rms = librosa.feature.rms(y, hop_length=hop_length)
    frame_feature = np.concatenate([mfcc, rms], axis=0)
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(frame_feature, order=1, mode='nearest')
    delta2 = librosa.feature.delta(frame_feature, order=2, mode='nearest')
    # X is 39 x T
    X = np.concatenate([frame_feature, delta1, delta2], axis=0) # O^r
    # return T x 39 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [57]:
mfcc = get_mfcc("./data/duong/17.wav")
mfcc.shape


(17, 39)

In [45]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
#     print(files)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [53]:
class_names = ["hai", "tien", "duong", "y_te", "benh_nhan", "test_hai", "test_tien", "test_duong", "test_y_te", "test_benh_nhan"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
# kmeans = clustering(all_vectors)
# print("centers", kmeans.cluster_centers_.shape)


Load hai dataset
Load tien dataset
Load duong dataset
Load y_te dataset
Load benh_nhan dataset
Load test_hai dataset
Load test_tien dataset
Load test_duong dataset
Load test_y_te dataset
Load test_benh_nhan dataset
vectors (25089, 39)


In [6]:
def initByBakis(nStates, bakisLevel):
        ''' init start_prob and transmat_prob by Bakis model ''' 
        startprobPrior = np.zeros(nStates)
        startprobPrior[0 : bakisLevel - 1] = 1./ (bakisLevel - 1)
         
        transmatPrior = getTransmatPrior(nStates, bakisLevel)
         
        return startprobPrior, transmatPrior
    
def getTransmatPrior(nStates, bakisLevel):
    ''' get transmat prior '''
    transmatPrior = (1. / bakisLevel) * np.eye(nStates)

    for i in range(nStates - (bakisLevel - 1)):
        for j in range(bakisLevel - 1):
            transmatPrior[i, i + j + 1] = 1. /  bakisLevel

    for i in range(nStates - bakisLevel + 1, nStates):
        for j in range(nStates - i -j):
            transmatPrior[i, i + j] = 1. / (nStates - i)

    return transmatPrior

In [60]:
initByBakis(5, 3)

(array([0.5, 0.5, 0. , 0. , 0. ]),
 array([[0.33333333, 0.33333333, 0.33333333, 0.        , 0.        ],
        [0.        , 0.33333333, 0.33333333, 0.33333333, 0.        ],
        [0.        , 0.        , 0.33333333, 0.33333333, 0.33333333],
        [0.        , 0.        , 0.        , 0.5       , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 1.        ]]))

In [7]:
models = {}

In [57]:
cname = "hai"
n_coms = 11
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=4)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class hai
(2864, 39) [33, 31, 44, 40, 42, 18, 25, 26, 35, 44, 22, 35, 23, 35, 27, 27, 15, 19, 26, 15, 26, 23, 20, 16, 24, 33, 21, 15, 19, 20, 19, 23, 18, 15, 25, 23, 20, 19, 24, 16, 16, 22, 27, 27, 14, 14, 18, 20, 34, 13, 13, 13, 21, 14, 19, 16, 27, 22, 14, 28, 17, 12, 21, 18, 32, 17, 21, 17, 13, 17, 18, 13, 8, 16, 12, 15, 14, 10, 28, 18, 12, 25, 148, 130, 114, 143, 155, 132, 135, 140] 90


         1     -241816.8972             +nan
         2     -207178.8953      +34638.0019
         3     -200476.1197       +6702.7756
         4     -198561.9008       +1914.2189
         5     -197650.4699        +911.4309
         6     -197316.7516        +333.7183
         7     -197060.2493        +256.5023
         8     -196706.5455        +353.7039
         9     -196348.7183        +357.8272
        10     -196130.2216        +218.4967
        11     -195843.4886        +286.7330
        12     -195619.8445        +223.6441
        13     -195521.1575         +98.6870
        14     -195482.1540         +39.0035
        15     -195444.0988         +38.0553
        16     -195428.1849         +15.9138
        17     -195421.4228          +6.7621
        18     -195414.0070          +7.4158
        19     -195405.6877          +8.3193
        20     -195400.1352          +5.5524
        21     -195395.5577          +4.5775
        22     -195390.7178          +4.8399
        23

In [35]:
cname = "tien"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class tien
(3602, 39) [13, 14, 18, 19, 16, 18, 24, 21, 16, 20, 17, 24, 18, 26, 19, 16, 16, 29, 24, 28, 19, 36, 25, 26, 29, 24, 23, 31, 29, 25, 14, 24, 18, 18, 19, 18, 16, 19, 24, 16, 19, 13, 21, 55, 17, 28, 25, 20, 41, 23, 24, 20, 14, 13, 24, 29, 41, 26, 26, 36, 25, 18, 25, 18, 28, 18, 19, 19, 24, 55, 29, 26, 24, 21, 16, 31, 24, 19, 17, 16, 28, 25, 23, 16, 104, 104, 99, 109, 109, 112, 112, 114, 124, 106, 117, 112, 124, 114, 114] 99


         1     -311532.6442             +nan
         2     -261133.1728      +50399.4714
         3     -252666.4948       +8466.6780
         4     -248827.5666       +3838.9282
         5     -247429.5097       +1398.0569
         6     -246982.8152        +446.6945
         7     -246714.6722        +268.1430
         8     -246433.9285        +280.7437
         9     -246078.1939        +355.7347
        10     -245830.9455        +247.2483
        11     -245778.2100         +52.7356
        12     -245744.1644         +34.0455
        13     -245707.6596         +36.5048
        14     -245657.8353         +49.8244
        15     -245634.8292         +23.0061
        16     -245607.3066         +27.5226
        17     -245584.8259         +22.4806
        18     -245577.9783          +6.8476
        19     -245575.1715          +2.8068
        20     -245573.0201          +2.1515
        21     -245570.2104          +2.8097
        22     -245566.5038          +3.7066
        23

In [10]:
cname = "benh_nhan"
n_coms = 18
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class benh_nhan
(6393, 39) [51, 39, 41, 47, 41, 65, 39, 52, 43, 48, 59, 68, 38, 50, 32, 40, 66, 50, 44, 41, 102, 44, 38, 49, 56, 44, 48, 33, 35, 54, 37, 38, 58, 32, 42, 61, 51, 47, 49, 40, 52, 31, 62, 56, 47, 35, 36, 58, 49, 39, 35, 40, 36, 42, 32, 34, 49, 53, 42, 40, 39, 45, 39, 40, 54, 50, 40, 58, 37, 46, 41, 33, 27, 27, 50, 45, 50, 52, 35, 45, 56, 37, 42, 65, 55, 42, 50, 37, 37, 45, 47, 269, 189, 155, 163, 145, 143, 114, 145, 130, 130, 140, 119, 153, 130, 122] 106


         1     -588334.5662             +nan
         2     -508378.6066      +79955.9596
         3     -493018.9806      +15359.6259
         4     -488086.3278       +4932.6528
         5     -485039.6500       +3046.6778
         6     -483660.7469       +1378.9032
         7     -482901.1909        +759.5560
         8     -482311.3987        +589.7921
         9     -481746.9361        +564.4626
        10     -481281.6224        +465.3138
        11     -480924.9314        +356.6909
        12     -480604.5054        +320.4260
        13     -480355.2027        +249.3027
        14     -480234.4162        +120.7865
        15     -480121.2666        +113.1496
        16     -479971.5449        +149.7217
        17     -479844.7086        +126.8363
        18     -479767.1000         +77.6086
        19     -479682.4185         +84.6815
        20     -479573.8465        +108.5720
        21     -479409.5941        +164.2524
        22     -479263.7876        +145.8065
        23

In [11]:
cname = "y_te"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class y_te
(6673, 39) [32, 44, 37, 50, 39, 44, 50, 39, 43, 63, 40, 62, 41, 47, 42, 37, 32, 44, 37, 50, 42, 49, 31, 46, 45, 34, 64, 58, 49, 72, 60, 59, 58, 39, 37, 49, 49, 39, 44, 50, 39, 60, 56, 52, 44, 61, 54, 39, 40, 76, 63, 37, 35, 51, 42, 41, 49, 41, 58, 59, 57, 38, 44, 53, 46, 54, 50, 46, 38, 50, 45, 47, 55, 57, 43, 38, 44, 35, 41, 58, 53, 38, 51, 61, 57, 48, 47, 63, 49, 34, 59, 43, 259, 173, 143, 155, 143, 135, 148, 143, 140, 140, 145, 135, 148, 132, 148] 107


         1     -602953.7082             +nan
         2     -523607.9579      +79345.7503
         3     -506833.3518      +16774.6061
         4     -501076.1660       +5757.1858
         5     -498573.5139       +2502.6521
         6     -497642.4728        +931.0411
         7     -497037.7578        +604.7150
         8     -496691.1885        +346.5693
         9     -496592.2840         +98.9045
        10     -496539.8678         +52.4162
        11     -496479.6764         +60.1914
        12     -496386.8359         +92.8405
        13     -496365.3436         +21.4923
        14     -496353.7188         +11.6248
        15     -496346.5129          +7.2059
        16     -496342.0716          +4.4413
        17     -496338.8722          +3.1994
        18     -496336.2461          +2.6261
        19     -496333.7857          +2.4603
        20     -496331.1064          +2.6793
        21     -496327.1101          +3.9963
        22     -496315.6478         +11.4623
        23

In [31]:
cname = "duong"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class duong
(3201, 39) [19, 47, 23, 17, 19, 16, 36, 19, 19, 19, 21, 14, 17, 15, 35, 22, 36, 22, 16, 15, 24, 18, 16, 19, 26, 36, 38, 38, 35, 27, 24, 19, 47, 14, 32, 18, 15, 24, 27, 33, 17, 23, 19, 18, 17, 28, 20, 19, 16, 32, 21, 285, 114, 158, 124, 150, 106, 119, 132, 106, 114, 124, 109, 122, 101, 130] 66


         1     -268263.9947             +nan
         2     -224835.6012      +43428.3935
         3     -216918.8749       +7916.7263
         4     -214385.9551       +2532.9198
         5     -213377.4732       +1008.4818
         6     -212802.6105        +574.8627
         7     -212587.3615        +215.2490
         8     -212447.3338        +140.0277
         9     -212319.9539        +127.3799
        10     -212220.8676         +99.0863
        11     -212123.0911         +97.7765
        12     -212034.5731         +88.5180
        13     -211946.9150         +87.6581
        14     -211873.2184         +73.6966
        15     -211831.9523         +41.2661
        16     -211807.8871         +24.0653
        17     -211799.4484          +8.4386
        18     -211795.4766          +3.9718
        19     -211790.9047          +4.5719
        20     -211783.6397          +7.2650
        21     -211762.6371         +21.0026
        22     -211740.9028         +21.7343
        23

In [50]:
models

{'hai': GMMHMM(algorithm='viterbi', covariance_type='diag',
        covars_prior=array([[[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5],
         [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -...
         0.25      , 0.25      , 0.25      , 0.25      , 0.        ,
         0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.25      , 0.25      , 0.25      , 0.25      ,
         0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.25      , 0.25      , 0.25      ,
         0.25      ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.        , 0.        ,

In [55]:
print("Testing")
accuracy = {}
test_name = {"test_hai"}
for true_cname in test_name:
    k = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
#         print(score)
        inverse = [(value, key) for key, value in score.items()]
        predict = max(inverse)[1]
        print(true_cname, predict)
        if predict == true_cname[5:]:
            k +=1
    accuracy[true_cname] = k/len(dataset[true_cname])
print(accuracy)

Testing
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
{'test_hai': 1.0}


In [56]:
import pickle
with open("gmm_hmm3.pkl", "wb") as file:
    pickle.dump(models, file)
print("Saved!")

Saved!
