In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [6]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    yt, index = librosa.effects.trim(y) # Trim the beginning and ending silence
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        yt, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # energy feature
    rms = librosa.feature.rms(y, hop_length=hop_length)
    frame_feature = np.concatenate([mfcc, rms], axis=0)
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(frame_feature, order=1, mode='nearest')
    delta2 = librosa.feature.delta(frame_feature, order=2, mode='nearest')
    # X is 39 x T
    X = np.concatenate([frame_feature, delta1, delta2], axis=0) # O^r
    # return T x 39 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [8]:
# mfcc = get_mfcc("./data/duong/17.wav")
# mfcc
y, sr = librosa.load("./data/duong/17.wav") # read .wav file
yt, index = librosa.effects.trim(y)


In [9]:
y, yt

(array([-0.07823925, -0.1147735 , -0.10022258, ...,  0.07561673,
         0.07219956,  0.07881433], dtype=float32),
 array([-0.07823925, -0.1147735 , -0.10022258, ...,  0.07561673,
         0.07219956,  0.07881433], dtype=float32))

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [7]:
class_names = ["hai", "tien", "duong", "y_te", "benh_nhan", "test_hai", "test_tien", "test_duong", "test_y_te", "test_benh_nhan"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
# kmeans = clustering(all_vectors)
# print("centers", kmeans.cluster_centers_.shape)


Load hai dataset
Load tien dataset
Load duong dataset
Load y_te dataset
Load benh_nhan dataset
Load test_hai dataset
Load test_tien dataset
Load test_duong dataset
Load test_y_te dataset
Load test_benh_nhan dataset
vectors (24281, 39)


In [10]:
def initByBakis(nStates, bakisLevel):
        ''' init start_prob and transmat_prob by Bakis model ''' 
        startprobPrior = np.zeros(nStates)
        startprobPrior[0 : bakisLevel - 1] = 1./ (bakisLevel - 1)
         
        transmatPrior = getTransmatPrior(nStates, bakisLevel)
         
        return startprobPrior, transmatPrior
    
def getTransmatPrior(nStates, bakisLevel):
    ''' get transmat prior '''
    transmatPrior = (1. / bakisLevel) * np.eye(nStates)

    for i in range(nStates - (bakisLevel - 1)):
        for j in range(bakisLevel - 1):
            transmatPrior[i, i + j + 1] = 1. /  bakisLevel

    for i in range(nStates - bakisLevel + 1, nStates):
        for j in range(nStates - i -j):
            transmatPrior[i, i + j] = 1. / (nStates - i)

    return transmatPrior

In [11]:
models = {}

In [30]:
cname = "hai"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class hai
(2107, 39) [19, 26, 15, 26, 23, 20, 16, 24, 33, 21, 15, 19, 20, 19, 23, 18, 15, 25, 23, 20, 19, 24, 16, 16, 22, 27, 27, 14, 14, 18, 20, 34, 13, 13, 13, 21, 14, 19, 16, 18, 30, 15, 11, 16, 27, 17, 16, 28, 14, 26, 14, 33, 40, 24, 22, 14, 28, 17, 12, 21, 18, 32, 26, 17, 21, 17, 13, 17, 18, 13, 8, 16, 12, 15, 14, 10, 28, 18, 12, 127, 106, 114, 93, 119] 84


         1     -183056.6780             +nan
         2     -161186.2651      +21870.4129
         3     -156622.7877       +4563.4773
         4     -154629.5130       +1993.2747
         5     -153525.3545       +1104.1585
         6     -152966.5886        +558.7659
         7     -152572.5550        +394.0336
         8     -152239.4211        +333.1339
         9     -152088.1209        +151.3002
        10     -152004.0695         +84.0514
        11     -151926.7850         +77.2845
        12     -151847.7277         +79.0573
        13     -151735.4266        +112.3011
        14     -151610.9184        +124.5082
        15     -151540.9472         +69.9712
        16     -151467.3027         +73.6444
        17     -151381.8408         +85.4620
        18     -151341.2566         +40.5842
        19     -151322.9671         +18.2895
        20     -151280.7228         +42.2443
        21     -151243.7288         +36.9940
        22     -151235.5708          +8.1580
        23

In [48]:
cname = "tien"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class tien
(3602, 39) [13, 14, 18, 19, 16, 18, 24, 21, 16, 20, 17, 24, 18, 26, 19, 16, 16, 29, 24, 28, 19, 36, 25, 26, 29, 24, 23, 31, 29, 25, 14, 24, 18, 18, 19, 18, 16, 19, 24, 16, 19, 13, 21, 55, 17, 28, 25, 20, 41, 23, 24, 20, 14, 13, 24, 29, 41, 26, 26, 36, 25, 18, 25, 18, 28, 18, 19, 19, 24, 55, 29, 26, 24, 21, 16, 31, 24, 19, 17, 16, 28, 25, 23, 16, 104, 104, 99, 109, 109, 112, 112, 114, 124, 106, 117, 112, 124, 114, 114] 99


         1     -311725.8752             +nan
         2     -260966.8961      +50758.9791
         3     -253172.6310       +7794.2651
         4     -250574.1550       +2598.4760
         5     -249130.9083       +1443.2467
         6     -248506.1776        +624.7306
         7     -248279.3399        +226.8377
         8     -248102.1351        +177.2048
         9     -247891.9669        +210.1682
        10     -247676.7523        +215.2146
        11     -247549.0576        +127.6947
        12     -247509.5944         +39.4632
        13     -247462.5272         +47.0672
        14     -247414.0364         +48.4908
        15     -247347.3075         +66.7289
        16     -247298.2668         +49.0407
        17     -247272.9826         +25.2842
        18     -247232.0615         +40.9211
        19     -247200.7552         +31.3062
        20     -247007.1272        +193.6281
        21     -246939.4264         +67.7008
        22     -246904.2709         +35.1555
        23

In [50]:
cname = "benh_nhan"
n_coms = 18
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class benh_nhan
(6393, 39) [51, 39, 41, 47, 41, 65, 39, 52, 43, 48, 59, 68, 38, 50, 32, 40, 66, 50, 44, 41, 102, 44, 38, 49, 56, 44, 48, 33, 35, 54, 37, 38, 58, 32, 42, 61, 51, 47, 49, 40, 52, 31, 62, 56, 47, 35, 36, 58, 49, 39, 35, 40, 36, 42, 32, 34, 49, 53, 42, 40, 39, 45, 39, 40, 54, 50, 40, 58, 37, 46, 41, 33, 27, 27, 50, 45, 50, 52, 35, 45, 56, 37, 42, 65, 55, 42, 50, 37, 37, 45, 47, 269, 189, 155, 163, 145, 143, 114, 145, 130, 130, 140, 119, 153, 130, 122] 106


         1     -587381.4144             +nan
         2     -506948.0049      +80433.4094
         3     -491520.9637      +15427.0413
         4     -486264.8993       +5256.0644
         5     -483233.8368       +3031.0625
         6     -482153.8136       +1080.0232
         7     -481452.1055        +701.7081
         8     -480985.6358        +466.4697
         9     -480673.9112        +311.7246
        10     -480469.0823        +204.8289
        11     -480243.6767        +225.4056
        12     -480044.7718        +198.9050
        13     -479894.8932        +149.8785
        14     -479844.6647         +50.2285
        15     -479783.3213         +61.3434
        16     -479700.0287         +83.2926
        17     -479636.7914         +63.2373
        18     -479564.1140         +72.6774
        19     -479507.2783         +56.8357
        20     -479438.1449         +69.1334
        21     -479351.1383         +87.0067
        22     -479268.6049         +82.5334
        23

In [51]:
cname = "y_te"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class y_te
(6673, 39) [32, 44, 37, 50, 39, 44, 50, 39, 43, 63, 40, 62, 41, 47, 42, 37, 32, 44, 37, 50, 42, 49, 31, 46, 45, 34, 64, 58, 49, 72, 60, 59, 58, 39, 37, 49, 49, 39, 44, 50, 39, 60, 56, 52, 44, 61, 54, 39, 40, 76, 63, 37, 35, 51, 42, 41, 49, 41, 58, 59, 57, 38, 44, 53, 46, 54, 50, 46, 38, 50, 45, 47, 55, 57, 43, 38, 44, 35, 41, 58, 53, 38, 51, 61, 57, 48, 47, 63, 49, 34, 59, 43, 259, 173, 143, 155, 143, 135, 148, 143, 140, 140, 145, 135, 148, 132, 148] 107


         1     -602977.5957             +nan
         2     -523332.3958      +79645.1999
         3     -506495.8708      +16836.5251
         4     -500956.8689       +5539.0018
         5     -498651.5065       +2305.3625
         6     -497760.6293        +890.8772
         7     -497149.1951        +611.4342
         8     -496751.3624        +397.8327
         9     -496621.1710        +130.1914
        10     -496568.8399         +52.3311
        11     -496525.7246         +43.1153
        12     -496486.6590         +39.0656
        13     -496422.9062         +63.7527
        14     -496378.8815         +44.0247
        15     -496368.1524         +10.7292
        16     -496361.1176          +7.0348
        17     -496355.2209          +5.8967
        18     -496348.7621          +6.4588
        19     -496340.2681          +8.4940
        20     -496332.3429          +7.9252
        21     -496326.1879          +6.1550
        22     -496308.9991         +17.1887
        23

In [53]:
cname = "duong"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mcw',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class duong
(3201, 39) [19, 47, 23, 17, 19, 16, 36, 19, 19, 19, 21, 14, 17, 15, 35, 22, 36, 22, 16, 15, 24, 18, 16, 19, 26, 36, 38, 38, 35, 27, 24, 19, 47, 14, 32, 18, 15, 24, 27, 33, 17, 23, 19, 18, 17, 28, 20, 19, 16, 32, 21, 285, 114, 158, 124, 150, 106, 119, 132, 106, 114, 124, 109, 122, 101, 130] 66


         1     -268982.8755             +nan
         2     -224789.3043      +44193.5712
         3     -216829.9337       +7959.3706
         4     -214199.9177       +2630.0160
         5     -213076.5529       +1123.3647
         6     -212467.3173        +609.2356
         7     -212178.9717        +288.3457
         8     -212036.5073        +142.4644
         9     -211950.8811         +85.6262
        10     -211887.9116         +62.9695
        11     -211833.3467         +54.5649
        12     -211783.3689         +49.9778
        13     -211698.0226         +85.3463
        14     -211631.1529         +66.8696
        15     -211577.0204         +54.1325
        16     -211497.6261         +79.3944
        17     -211469.8053         +27.8207
        18     -211446.4288         +23.3766
        19     -211409.8102         +36.6186
        20     -211375.9082         +33.9020
        21     -211327.4997         +48.4085
        22     -211316.1670         +11.3327
        23

In [54]:
models

{'hai': GMMHMM(algorithm='viterbi', covariance_type='diag',
        covars_prior=array([[[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5],
         [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -...
        [0.        , 0.        , 0.        , 0.33333333, 0.33333333,
         0.33333333, 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.33333333,
         0.33333333, 0.33333333, 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.33333333, 0.33333333, 0.33333333, 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.33333333, 0.33333333, 0.33333333],
        [0.     

In [55]:
print("Testing")
accuracy = {}
test_name = {"test_y_te", "test_hai", "test_benh_nhan", "test_duong", "test_tien"}
for true_cname in test_name:
    k = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        predict = max(inverse)[1]
        print(true_cname, predict)
        if predict == true_cname[5:]:
            k +=1
    accuracy[true_cname] = k/len(dataset[true_cname])
print(accuracy)

Testing
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh

In [56]:
import pickle
with open("gmm_hmm2.pkl", "wb") as file:
    pickle.dump(models, file)
print("Saved!")

Saved!
