In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # energy feature
    rms = librosa.feature.rms(y, hop_length=hop_length)
    frame_feature = np.concatenate([mfcc, rms], axis=0)
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(frame_feature, order=1, mode='nearest')
    delta2 = librosa.feature.delta(frame_feature, order=2, mode='nearest')
    # X is 39 x T
    X = np.concatenate([frame_feature, delta1, delta2], axis=0) # O^r
    # return T x 39 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [36]:
mfcc = get_mfcc("./data/duong/17.wav")
# mfcc

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [46]:
class_names = ["hai", "tien", "duong", "y_te", "benh_nhan", "test_hai", "test_tien", "test_duong", "test_y_te", "test_benh_nhan"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
# kmeans = clustering(all_vectors)
# print("centers", kmeans.cluster_centers_.shape)


Load hai dataset
Load tien dataset
Load duong dataset
Load y_te dataset
Load benh_nhan dataset
Load test_hai dataset
Load test_tien dataset
Load test_duong dataset
Load test_y_te dataset
Load test_benh_nhan dataset
vectors (24281, 39)


In [5]:
def initByBakis(nStates, bakisLevel):
        ''' init start_prob and transmat_prob by Bakis model ''' 
        startprobPrior = np.zeros(nStates)
        startprobPrior[0 : bakisLevel - 1] = 1./ (bakisLevel - 1)
         
        transmatPrior = getTransmatPrior(nStates, bakisLevel)
         
        return startprobPrior, transmatPrior
    
def getTransmatPrior(nStates, bakisLevel):
    ''' get transmat prior '''
    transmatPrior = (1. / bakisLevel) * np.eye(nStates)

    for i in range(nStates - (bakisLevel - 1)):
        for j in range(bakisLevel - 1):
            transmatPrior[i, i + j + 1] = 1. /  bakisLevel

    for i in range(nStates - bakisLevel + 1, nStates):
        for j in range(nStates - i -j):
            transmatPrior[i, i + j] = 1. / (nStates - i)

    return transmatPrior

In [6]:
models = {}

In [37]:
cname = "hai"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mst',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class hai
(2107, 39) [19, 26, 15, 26, 23, 20, 16, 24, 33, 21, 15, 19, 20, 19, 23, 18, 15, 25, 23, 20, 19, 24, 16, 16, 22, 27, 27, 14, 14, 18, 20, 34, 13, 13, 13, 21, 14, 19, 16, 18, 30, 15, 11, 16, 27, 17, 16, 28, 14, 26, 14, 33, 40, 24, 22, 14, 28, 17, 12, 21, 18, 32, 26, 17, 21, 17, 13, 17, 18, 13, 8, 16, 12, 15, 14, 10, 28, 18, 12, 127, 106, 114, 93, 119] 84


         1     -183140.0968             +nan
         2     -161176.2523      +21963.8445
         3     -156305.8696       +4870.3827
         4     -155049.7597       +1256.1099
         5     -154466.6621        +583.0976
         6     -154040.8564        +425.8057
         7     -153853.6652        +187.1912
         8     -153750.3868        +103.2784
         9     -153649.7051        +100.6817
        10     -153543.6955        +106.0096
        11     -153327.8316        +215.8639
        12     -152826.9969        +500.8347
        13     -152670.5408        +156.4560
        14     -152531.9012        +138.6396
        15     -152454.4569         +77.4443
        16     -152419.9919         +34.4650
        17     -152401.4955         +18.4965
        18     -152384.5753         +16.9202
        19     -152368.4516         +16.1237
        20     -152355.6616         +12.7899
        21     -152335.0820         +20.5797
        22     -152297.5252         +37.5568
        23

In [47]:
cname = "tien"
n_coms = 12
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mst',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class tien
(3602, 39) [13, 14, 18, 19, 16, 18, 24, 21, 16, 20, 17, 24, 18, 26, 19, 16, 16, 29, 24, 28, 19, 36, 25, 26, 29, 24, 23, 31, 29, 25, 14, 24, 18, 18, 19, 18, 16, 19, 24, 16, 19, 13, 21, 55, 17, 28, 25, 20, 41, 23, 24, 20, 14, 13, 24, 29, 41, 26, 26, 36, 25, 18, 25, 18, 28, 18, 19, 19, 24, 55, 29, 26, 24, 21, 16, 31, 24, 19, 17, 16, 28, 25, 23, 16, 104, 104, 99, 109, 109, 112, 112, 114, 124, 106, 117, 112, 124, 114, 114] 99


         1     -309588.5653             +nan
         2     -255721.7575      +53866.8078
         3     -247643.0089       +8078.7486
         4     -244260.8135       +3382.1954
         5     -242413.5826       +1847.2309
         6     -241128.9033       +1284.6793
         7     -240464.2950        +664.6083
         8     -240065.9266        +398.3685
         9     -239815.5907        +250.3359
        10     -239616.9569        +198.6338
        11     -239521.4459         +95.5110
        12     -239440.3787         +81.0671
        13     -239379.2842         +61.0945
        14     -239340.9585         +38.3257
        15     -239330.4313         +10.5272
        16     -239324.4729          +5.9584
        17     -239316.6683          +7.8047
        18     -239308.9967          +7.6715
        19     -239301.4182          +7.5786
        20     -239286.7829         +14.6353
        21     -239264.3837         +22.3992
        22     -239242.8571         +21.5266
        23

In [48]:
cname = "benh_nhan"
n_coms = 24
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mst',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class benh_nhan
(6393, 39) [51, 39, 41, 47, 41, 65, 39, 52, 43, 48, 59, 68, 38, 50, 32, 40, 66, 50, 44, 41, 102, 44, 38, 49, 56, 44, 48, 33, 35, 54, 37, 38, 58, 32, 42, 61, 51, 47, 49, 40, 52, 31, 62, 56, 47, 35, 36, 58, 49, 39, 35, 40, 36, 42, 32, 34, 49, 53, 42, 40, 39, 45, 39, 40, 54, 50, 40, 58, 37, 46, 41, 33, 27, 27, 50, 45, 50, 52, 35, 45, 56, 37, 42, 65, 55, 42, 50, 37, 37, 45, 47, 269, 189, 155, 163, 145, 143, 114, 145, 130, 130, 140, 119, 153, 130, 122] 106


         1     -585177.1428             +nan
         2     -501276.4420      +83900.7008
         3     -485035.2964      +16241.1456
         4     -478658.8578       +6376.4386
         5     -475136.2451       +3522.6127
         6     -473432.8812       +1703.3639
         7     -472356.9839       +1075.8973
         8     -471836.3168        +520.6670
         9     -471433.9045        +402.4123
        10     -471135.4950        +298.4095
        11     -470811.0596        +324.4355
        12     -470534.6113        +276.4483
        13     -470387.6159        +146.9953
        14     -470224.4878        +163.1281
        15     -470158.2998         +66.1880
        16     -470089.2130         +69.0868
        17     -469993.6646         +95.5484
        18     -469937.4669         +56.1977
        19     -469884.0276         +53.4392
        20     -469802.9333         +81.0943
        21     -469707.5774         +95.3559
        22     -469654.5636         +53.0139
        23

In [49]:
cname = "y_te"
n_coms = 9
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mst',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class y_te
(6673, 39) [32, 44, 37, 50, 39, 44, 50, 39, 43, 63, 40, 62, 41, 47, 42, 37, 32, 44, 37, 50, 42, 49, 31, 46, 45, 34, 64, 58, 49, 72, 60, 59, 58, 39, 37, 49, 49, 39, 44, 50, 39, 60, 56, 52, 44, 61, 54, 39, 40, 76, 63, 37, 35, 51, 42, 41, 49, 41, 58, 59, 57, 38, 44, 53, 46, 54, 50, 46, 38, 50, 45, 47, 55, 57, 43, 38, 44, 35, 41, 58, 53, 38, 51, 61, 57, 48, 47, 63, 49, 34, 59, 43, 259, 173, 143, 155, 143, 135, 148, 143, 140, 140, 145, 135, 148, 132, 148] 107


         1     -602991.7395             +nan
         2     -523344.1965      +79647.5430
         3     -506510.0605      +16834.1360
         4     -500989.8785       +5520.1820
         5     -498679.0765       +2310.8020
         6     -497775.9244        +903.1521
         7     -497162.3950        +613.5294
         8     -496757.8203        +404.5746
         9     -496618.5643        +139.2560
        10     -496563.6253         +54.9389
        11     -496524.3702         +39.2552
        12     -496484.9023         +39.4679
        13     -496424.4600         +60.4423
        14     -496377.4941         +46.9659
        15     -496355.2805         +22.2136
        16     -496347.3318          +7.9487
        17     -496341.4112          +5.9205
        18     -496335.9244          +5.4869
        19     -496330.3123          +5.6121
        20     -496326.1678          +4.1445
        21     -496323.7972          +2.3706
        22     -496322.2092          +1.5880
        23

In [53]:
cname = "duong"
n_coms = 15
startprobPrior,transmatPrior = initByBakis(nStates=n_coms,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = n_coms, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mst',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X)
models[cname] = hmm

training class duong
(3201, 39) [19, 47, 23, 17, 19, 16, 36, 19, 19, 19, 21, 14, 17, 15, 35, 22, 36, 22, 16, 15, 24, 18, 16, 19, 26, 36, 38, 38, 35, 27, 24, 19, 47, 14, 32, 18, 15, 24, 27, 33, 17, 23, 19, 18, 17, 28, 20, 19, 16, 32, 21, 285, 114, 158, 124, 150, 106, 119, 132, 106, 114, 124, 109, 122, 101, 130] 66


         1     -265986.4212             +nan
         2     -216422.9533      +49563.4680
         3     -208758.6877       +7664.2655
         4     -205861.1136       +2897.5742
         5     -204386.1643       +1474.9493
         6     -204041.7879        +344.3764
         7     -203704.0040        +337.7839
         8     -203443.9916        +260.0124
         9     -203238.9421        +205.0495
        10     -203033.7583        +205.1839
        11     -202824.8886        +208.8697
        12     -202705.5915        +119.2971
        13     -202627.4280         +78.1635
        14     -202529.6019         +97.8260
        15     -202456.5651         +73.0369
        16     -202430.5238         +26.0412
        17     -202397.1550         +33.3689
        18     -202353.2523         +43.9027
        19     -202282.7088         +70.5435
        20     -202219.1291         +63.5797
        21     -202160.6808         +58.4483
        22     -202099.0676         +61.6132
        23

In [54]:
models

{'hai': GMMHMM(algorithm='viterbi', covariance_type='diag',
        covars_prior=array([[[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5],
         [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
          -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -...
        [0.        , 0.        , 0.        , 0.33333333, 0.33333333,
         0.33333333, 0.        , 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.33333333,
         0.33333333, 0.33333333, 0.        , 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.33333333, 0.33333333, 0.33333333, 0.        ],
        [0.        , 0.        , 0.        , 0.        , 0.        ,
         0.        , 0.33333333, 0.33333333, 0.33333333],
        [0.     

In [56]:
print("Testing")
accuracy = {}
test_name = {"test_y_te", "test_hai", "test_benh_nhan", "test_duong", "test_tien"}
for true_cname in test_name:
    k = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        predict = max(inverse)[1]
        print(true_cname, predict)
        if predict == true_cname[5:]:
            k +=1
    accuracy[true_cname] = k/len(dataset[true_cname])
print(accuracy)

Testing
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_y_te y_te
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_hai hai
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_tien tien
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_duong duong
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh_nhan benh_nhan
test_benh

In [55]:
import pickle
with open("gmm_hmm1.pkl", "wb") as file:
    pickle.dump(models, file)
print("Saved!")

Saved!
