In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1, mode="nearest")
    delta2 = librosa.feature.delta(mfcc, order=2, mode="nearest")
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [27]:
class_names = ["hai", "tien", "duong", "benh_nhan", "y_te", "test_hai"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)


Load hai dataset
Load tien dataset
Load duong dataset
Load benh_nhan dataset
Load y_te dataset
Load test_hai dataset
vectors (14898, 36)


In [33]:
models = {}

for cname in class_names:
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    hmm = hmmlearn.hmm.GMMHMM(
        n_components=6, n_mix = 2, random_state=0, n_iter=1000, verbose=True,
        params='mctw',
        init_params='mc',
    )
    hmm.startprob_ = np.array([1.0,0.0,0.0,0.0,0.0,0.0])
#     hmm.transmat_ = np.array([
#             [0.7,0.3,0.0,0.0,0.0,0.0],
#             [0.0,0.7,0.3,0.0,0.0,0.0],
#             [0.0,0.0,0.7,0.3,0.0,0.0],
#             [0.0,0.0,0.0,0.7,0.3,0.0],
#             [0.0,0.0,0.0,0.0,0.7,0.3],
#             [0.0,0.0,0.0,0.0,0.0,1.0]
#         ])

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class hai
(1548, 36) [19, 26, 15, 26, 23, 20, 16, 24, 33, 21, 15, 19, 20, 19, 23, 18, 15, 25, 23, 20, 19, 24, 16, 16, 22, 27, 27, 14, 14, 18, 20, 34, 13, 13, 13, 21, 14, 19, 16, 18, 30, 15, 11, 16, 27, 17, 16, 28, 14, 26, 14, 33, 40, 24, 22, 14, 28, 17, 12, 21, 18, 32, 26, 17, 21, 17, 13, 17, 18, 13, 8, 16, 12, 15, 14, 10, 28, 18, 12] 79


         1     -146965.8190             +nan
         2     -140442.5610       +6523.2580
         3     -139798.7935        +643.7675
         4     -139658.9273        +139.8662
         5     -139581.5501         +77.3773
         6     -139507.3348         +74.2153
         7     -139457.7175         +49.6173
         8     -139429.4269         +28.2906
         9     -139410.1609         +19.2660
        10     -139394.5198         +15.6411
        11     -139383.3567         +11.1631
        12     -139365.7759         +17.5808
        13     -139351.2588         +14.5171
        14     -139340.9947         +10.2641
        15     -139331.5458          +9.4489
        16     -139319.8890         +11.6569
        17     -139307.7588         +12.1302
        18     -139297.9124          +9.8464
        19     -139284.4272         +13.4851
        20     -139252.9237         +31.5036
        21     -139222.7467         +30.1770
        22     -139195.9717         +26.7750
        23

training class tien
(1928, 36) [13, 14, 18, 19, 16, 18, 24, 21, 16, 20, 17, 24, 18, 26, 19, 16, 16, 29, 24, 28, 19, 36, 25, 26, 29, 24, 23, 31, 29, 25, 14, 24, 18, 18, 19, 18, 16, 19, 24, 16, 19, 13, 21, 55, 17, 28, 25, 20, 41, 23, 24, 20, 14, 13, 24, 29, 41, 26, 26, 36, 25, 18, 25, 18, 28, 18, 19, 19, 24, 55, 29, 26, 24, 21, 16, 31, 24, 19, 17, 16, 28, 25, 23, 16] 84


         1     -187743.1460             +nan
         2     -177306.5080      +10436.6380
         3     -176199.0772       +1107.4308
         4     -175838.9336        +360.1436
         5     -175701.9921        +136.9415
         6     -175657.0241         +44.9680
         7     -175631.9918         +25.0323
         8     -175597.2197         +34.7722
         9     -175557.9648         +39.2549
        10     -175502.3084         +55.6564
        11     -175481.3881         +20.9203
        12     -175470.6728         +10.7153
        13     -175450.3447         +20.3281
        14     -175437.4420         +12.9027
        15     -175422.8677         +14.5742
        16     -175405.2938         +17.5739
        17     -175385.6957         +19.5981
        18     -175344.9082         +40.7875
        19     -175324.9600         +19.9482
        20     -175313.7914         +11.1686
        21     -175308.7669          +5.0245
        22     -175303.4258          +5.3411
        23

training class duong
(1207, 36) [19, 47, 23, 17, 19, 16, 36, 19, 19, 19, 21, 14, 17, 15, 35, 22, 36, 22, 16, 15, 24, 18, 16, 19, 26, 36, 38, 38, 35, 27, 24, 19, 47, 14, 32, 18, 15, 24, 27, 33, 17, 23, 19, 18, 17, 28, 20, 19, 16, 32, 21] 51


         1     -116948.2009             +nan
         2     -111047.2339       +5900.9670
         3     -109979.0076       +1068.2263
         4     -109661.3388        +317.6688
         5     -109430.1816        +231.1572
         6     -109287.1605        +143.0211
         7     -109135.6763        +151.4842
         8     -109077.7910         +57.8853
         9     -109054.9099         +22.8811
        10     -109038.6214         +16.2884
        11     -109024.3583         +14.2631
        12     -109014.2229         +10.1354
        13     -109010.7968          +3.4261
        14     -109008.1911          +2.6058
        15     -109005.1639          +3.0272
        16     -109003.9678          +1.1961
        17     -109003.5121          +0.4557
        18     -109003.1793          +0.3328
        19     -109002.8351          +0.3442
        20     -109002.3907          +0.4443
        21     -109001.7940          +0.5967
        22     -109000.9319          +0.8621
        23

training class benh_nhan
(4146, 36) [51, 39, 41, 47, 41, 65, 39, 52, 43, 48, 59, 68, 38, 50, 32, 40, 66, 50, 44, 41, 102, 44, 38, 49, 56, 44, 48, 33, 35, 54, 37, 38, 58, 32, 42, 61, 51, 47, 49, 40, 52, 31, 62, 56, 47, 35, 36, 58, 49, 39, 35, 40, 36, 42, 32, 34, 49, 53, 42, 40, 39, 45, 39, 40, 54, 50, 40, 58, 37, 46, 41, 33, 27, 27, 50, 45, 50, 52, 35, 45, 56, 37, 42, 65, 55, 42, 50, 37, 37, 45, 47] 91


         1     -428743.2398             +nan
         2     -410225.3365      +18517.9033
         3     -405134.9393       +5090.3971
         4     -403805.9730       +1328.9663
         5     -402861.1619        +944.8111
         6     -402198.2149        +662.9470
         7     -401609.8990        +588.3159
         8     -401100.2021        +509.6969
         9     -400804.0324        +296.1697
        10     -400674.5403        +129.4921
        11     -400615.0015         +59.5387
        12     -400554.3514         +60.6502
        13     -400501.0531         +53.2983
        14     -400461.6107         +39.4424
        15     -400419.3928         +42.2179
        16     -400384.8323         +34.5605
        17     -400367.1677         +17.6647
        18     -400350.9886         +16.1791
        19     -400333.0508         +17.9377
        20     -400315.2806         +17.7702
        21     -400307.3259          +7.9547
        22     -400302.4022          +4.9237
        23

training class y_te
(4386, 36) [32, 44, 37, 50, 39, 44, 50, 39, 43, 63, 40, 62, 41, 47, 42, 37, 32, 44, 37, 50, 42, 49, 31, 46, 45, 34, 64, 58, 49, 72, 60, 59, 58, 39, 37, 49, 49, 39, 44, 50, 39, 60, 56, 52, 44, 61, 54, 39, 40, 76, 63, 37, 35, 51, 42, 41, 49, 41, 58, 59, 57, 38, 44, 53, 46, 54, 50, 46, 38, 50, 45, 47, 55, 57, 43, 38, 44, 35, 41, 58, 53, 38, 51, 61, 57, 48, 47, 63, 49, 34, 59, 43] 92


         1     -440906.1670             +nan
         2     -421839.8051      +19066.3618
         3     -417546.4421       +4293.3631
         4     -416155.2647       +1391.1774
         5     -415474.8146        +680.4501
         6     -415125.4747        +349.3400
         7     -414953.1793        +172.2953
         8     -414862.6628         +90.5165
         9     -414808.9609         +53.7019
        10     -414771.2108         +37.7501
        11     -414726.3638         +44.8470
        12     -414675.4026         +50.9611
        13     -414627.6183         +47.7843
        14     -414564.3987         +63.2196
        15     -414476.9409         +87.4578
        16     -414389.8976         +87.0434
        17     -414287.8159        +102.0817
        18     -414207.9765         +79.8394
        19     -414128.6100         +79.3665
        20     -414076.5443         +52.0657
        21     -414053.4372         +23.1071
        22     -414037.2901         +16.1471
        23

Training done


       113     -413364.0720          +0.0083


In [34]:
print("Testing")
accuracy = {}
test_name = {"test_hai"}
for true_cname in test_name:
    k = 0
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        inverse = [(value, key) for key, value in score.items()]
        predict = max(inverse)[1]
        print(true_cname, predict)
        if predict == true_cname[5:]:
            k +=1
    accuracy[true_cname] = k/len(dataset[true_cname])
print(accuracy)

Testing
test_hai benh_nhan
test_hai benh_nhan
test_hai benh_nhan
test_hai benh_nhan
test_hai benh_nhan
test_hai benh_nhan
test_hai benh_nhan
test_hai tien
test_hai benh_nhan
test_hai benh_nhan
test_hai benh_nhan
{'test_hai': 0.0}


In [30]:
import tkinter as tk

class Application(tk.Frame):
    def __init__(self, master=None):
        super().__init__(master)
        self.master = master
        self.pack()
        self.create_widgets()

    def create_widgets(self):
        self.hi_there = tk.Button(self)
        self.hi_there["text"] = "Hello World\n(click me)"
        self.hi_there["command"] = self.say_hi
        self.hi_there.pack(side="top")

        self.quit = tk.Button(self, text="QUIT", fg="red",
                              command=self.master.destroy)
        self.quit.pack(side="bottom")

    def say_hi(self):
        print("hi there, everyone!")

root = tk.Tk()
app = Application(master=root)
app.mainloop()

hi there, everyone!
hi there, everyone!
