In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [5]:
class_names = ["cothe", "duoc", "khong", "nguoi", "trong" ,"test_cothe", "test_duoc", "test_khong", "test_nguoi", "test_trong"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data/cutted", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
#print("vectors", all_vectors.shape)

Load cothe dataset
Load duoc dataset
Load khong dataset
Load nguoi dataset
Load trong dataset
Load test_cothe dataset
Load test_duoc dataset
Load test_khong dataset
Load test_nguoi dataset
Load test_trong dataset


In [6]:
models = {}

for cname in class_names:
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    hmm = hmmlearn.hmm.GMMHMM(
        n_components=6, n_mix = 2, random_state=42, n_iter=1000, verbose=True,
        params='mctw',
        init_params='m',
        startprob_prior= np.array([1.0,0.0,0.0,0.0,0.0,0.0])
    )

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")

training class cothe
(3311, 36) [44, 47, 51, 44, 47, 64, 41, 47, 41, 37, 37, 37, 37, 47, 37, 41, 37, 34, 41, 37, 47, 41, 37, 44, 47, 41, 51, 67, 37, 41, 41, 37, 37, 34, 47, 31, 67, 47, 37, 37, 44, 41, 37, 44, 34, 44, 44, 34, 47, 51, 34, 34, 34, 37, 41, 51, 27, 31, 37, 34, 47, 44, 37, 44, 34, 47, 37, 37, 44, 37, 41, 74, 44, 37, 41, 41, 41, 51, 37] 79


         1     -347528.1838             +nan
         2     -333556.7098      +13971.4740
         3     -331728.1346       +1828.5753
         4     -330813.3180        +914.8166
         5     -330368.9005        +444.4175
         6     -330248.4053        +120.4952
         7     -330208.1978         +40.2075
         8     -330190.6763         +17.5214
         9     -330169.2203         +21.4561
        10     -330136.2210         +32.9993
        11     -330114.9336         +21.2873
        12     -330103.0619         +11.8717
        13     -330095.2202          +7.8417
        14     -330091.3312          +3.8890
        15     -330085.9383          +5.3930
        16     -330076.9596          +8.9787
        17     -330057.6005         +19.3591
        18     -330050.6421          +6.9584
        19     -330041.6269          +9.0152
        20     -330030.6122         +11.0147
        21     -330026.5908          +4.0213
        22     -330024.5499          +2.0409
        23

training class duoc
(1892, 36) [21, 17, 24, 37, 27, 24, 24, 24, 17, 31, 27, 17, 21, 21, 21, 24, 14, 21, 24, 27, 24, 27, 31, 27, 24, 24, 17, 14, 24, 17, 27, 24, 24, 21, 14, 17, 24, 17, 31, 21, 27, 27, 17, 17, 17, 21, 31, 17, 24, 27, 24, 27, 24, 21, 21, 21, 41, 24, 21, 34, 57, 31, 27, 27, 21, 21, 21, 24, 21, 17, 27, 17, 21, 21, 27, 21, 27, 24, 21, 24] 80


         1     -200544.0569             +nan
         2     -192070.2866       +8473.7703
         3     -189183.6166       +2886.6700
         4     -188616.8914        +566.7251
         5     -188401.0233        +215.8681
         6     -188231.3447        +169.6786
         7     -188167.3140         +64.0307
         8     -188109.2067         +58.1073
         9     -188052.5268         +56.6799
        10     -187993.5705         +58.9564
        11     -187964.2764         +29.2941
        12     -187949.4852         +14.7913
        13     -187934.4764         +15.0087
        14     -187921.4047         +13.0717
        15     -187912.7367          +8.6680
        16     -187901.5618         +11.1749
        17     -187892.5096          +9.0522
        18     -187886.5057          +6.0039
        19     -187879.7289          +6.7767
        20     -187871.8861          +7.8429
        21     -187863.8780          +8.0080
        22     -187855.1593          +8.7187
        23

training class khong
(2006, 36) [44, 21, 21, 24, 31, 21, 24, 24, 21, 21, 24, 27, 21, 21, 31, 24, 21, 27, 14, 24, 24, 27, 24, 34, 24, 31, 27, 21, 27, 24, 27, 31, 47, 24, 21, 24, 24, 34, 27, 17, 21, 27, 24, 24, 17, 31, 21, 24, 17, 21, 21, 27, 24, 31, 41, 31, 24, 27, 24, 24, 24, 21, 24, 21, 24, 27, 21, 24, 24, 27, 24, 17, 21, 17, 24, 31, 41, 24, 17, 27] 80


         1     -210446.1883             +nan
         2     -200881.8662       +9564.3222
         3     -199303.5755       +1578.2907
         4     -198965.9663        +337.6092
         5     -198784.4083        +181.5580
         6     -198613.1273        +171.2810
         7     -198440.1382        +172.9890
         8     -198345.5774         +94.5609
         9     -198278.2903         +67.2871
        10     -198217.1477         +61.1426
        11     -198166.2676         +50.8801
        12     -198131.5802         +34.6874
        13     -198109.4324         +22.1478
        14     -198083.6139         +25.8184
        15     -198063.0968         +20.5171
        16     -198045.7882         +17.3086
        17     -198011.5360         +34.2522
        18     -197988.4761         +23.0599
        19     -197981.5866          +6.8895
        20     -197975.7953          +5.7913
        21     -197969.2540          +6.5413
        22     -197960.5443          +8.7097
        23

training class nguoi
(1640, 36) [22, 20, 26, 32, 27, 20, 29, 20, 37, 22, 34, 25, 17, 17, 16, 18, 24, 26, 28, 20, 22, 22, 13, 21, 21, 23, 23, 21, 24, 15, 17, 18, 28, 17, 23, 19, 19, 25, 23, 32, 27, 22, 17, 18, 32, 19, 22, 28, 28, 39, 19, 22, 19, 16, 25, 17, 33, 22, 20, 28, 13, 24, 24, 28, 50, 36, 26, 18, 19, 23] 70


         1     -162582.3147             +nan
         2     -154953.9910       +7628.3237
         3     -153728.5411       +1225.4500
         4     -153333.5480        +394.9930
         5     -153172.1607        +161.3873
         6     -153069.1592        +103.0015
         7     -152950.2505        +118.9086
         8     -152824.5450        +125.7055
         9     -152651.7860        +172.7590
        10     -152560.0634         +91.7227
        11     -152502.6999         +57.3634
        12     -152430.9715         +71.7285
        13     -152384.2341         +46.7374
        14     -152351.4909         +32.7432
        15     -152335.2233         +16.2676
        16     -152322.0424         +13.1809
        17     -152313.4885          +8.5539
        18     -152299.4933         +13.9952
        19     -152279.3926         +20.1007
        20     -152274.6331          +4.7595
        21     -152272.2237          +2.4094
        22     -152271.5266          +0.6971
        23

training class trong
(2140, 36) [27, 34, 31, 24, 27, 27, 31, 47, 24, 24, 21, 24, 31, 34, 27, 21, 37, 24, 31, 27, 31, 21, 27, 21, 24, 24, 44, 37, 34, 34, 24, 31, 24, 31, 21, 24, 24, 21, 17, 21, 24, 21, 24, 27, 27, 51, 21, 14, 31, 21, 27, 21, 24, 24, 34, 27, 24, 31, 27, 24, 31, 27, 27, 24, 24, 24, 27, 21, 31, 27, 17, 24, 31, 24, 24, 44, 24, 57] 78


         1     -227590.6369             +nan
         2     -217991.2830       +9599.3539
         3     -216159.2113       +1832.0717
         4     -215542.1601        +617.0512
         5     -215225.9649        +316.1952
         6     -215053.7168        +172.2481
         7     -214972.4031         +81.3137
         8     -214867.8621        +104.5410
         9     -214772.4919         +95.3702
        10     -214687.2873         +85.2046
        11     -214563.5521        +123.7352
        12     -214459.4012        +104.1509
        13     -214388.1576         +71.2436
        14     -214345.7388         +42.4189
        15     -214312.8328         +32.9059
        16     -214283.2351         +29.5978
        17     -214263.6487         +19.5864
        18     -214252.9419         +10.7068
        19     -214229.7444         +23.1975
        20     -214214.9360         +14.8084
        21     -214211.3182          +3.6178
        22     -214206.9092          +4.4090
        23

Training done


        96     -214034.6118          +0.0184
        97     -214034.6017          +0.0100
        98     -214034.5968          +0.0049


In [7]:
print("Testing")

accuracy = {}
for cname in class_names:
    total_data = len(dataset[cname])
    true_cnt = 0
    true_result = class_names[class_names.index(cname) % 5] # true result là tên chính xác của bộ test
    for O in dataset[cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        result = max(score, key=lambda k: score[k])
        isTrue = true_result == result
        print(cname, score, result, isTrue)
        if isTrue:
            true_cnt += 1
    accuracy[cname] = true_cnt/total_data    
print("\n\nAccuracy:", accuracy)

Testing
cothe {'cothe': -4229.859699745383, 'duoc': -4626.775905790555, 'khong': -4615.468311501021, 'nguoi': -4589.615361721264, 'trong': -4697.98650688884} cothe True
cothe {'cothe': -4612.712014216724, 'duoc': -5133.323037595656, 'khong': -5013.673950090564, 'nguoi': -5283.223681987363, 'trong': -5244.082015701066} cothe True
cothe {'cothe': -5159.054337057192, 'duoc': -5874.474886062506, 'khong': -5773.6498610573135, 'nguoi': -6082.708425813, 'trong': -5944.294307664803} cothe True
cothe {'cothe': -4840.213851774417, 'duoc': -5349.316694086865, 'khong': -5278.769699912248, 'nguoi': -5625.582095795544, 'trong': -5366.81291179924} cothe True
cothe {'cothe': -4659.863817375563, 'duoc': -5099.6561001169475, 'khong': -4919.7205130880075, 'nguoi': -5270.864239014555, 'trong': -5034.586380252311} cothe True
cothe {'cothe': -5795.72079244521, 'duoc': -6168.11321609431, 'khong': -6109.106783929486, 'nguoi': -6288.486495496533, 'trong': -6119.679363241134} cothe True
cothe {'cothe': -4542.25

cothe {'cothe': -3404.59988342216, 'duoc': -3630.3117426095455, 'khong': -3681.570626372944, 'nguoi': -3675.3362593150373, 'trong': -3644.293332918087} cothe True
cothe {'cothe': -3521.246550150918, 'duoc': -3887.0285781212683, 'khong': -3829.2570767406783, 'nguoi': -4113.956277734039, 'trong': -3986.8891938922307} cothe True
cothe {'cothe': -4067.487479801507, 'duoc': -4574.2064009510505, 'khong': -4470.068550458843, 'nguoi': -4734.7039775896465, 'trong': -4563.12206305808} cothe True
cothe {'cothe': -4685.670438328815, 'duoc': -5037.150751375128, 'khong': -4972.171781401938, 'nguoi': -5075.793234011183, 'trong': -5086.1382805383455} cothe True
cothe {'cothe': -2584.0020363529393, 'duoc': -2805.485608679824, 'khong': -2848.0386913829047, 'nguoi': -2936.045178885845, 'trong': -2859.7871729406766} cothe True
cothe {'cothe': -3524.60553358381, 'duoc': -3950.177117641083, 'khong': -4107.594442762962, 'nguoi': -3982.2531540196896, 'trong': -4058.1544484294723} cothe True
cothe {'cothe': -3

duoc {'cothe': -2816.173600081121, 'duoc': -2500.04048122619, 'khong': -2817.6042724777094, 'nguoi': -2889.9506423260796, 'trong': -2770.73652674846} duoc True
duoc {'cothe': -1668.1114378002603, 'duoc': -1499.7965418956433, 'khong': -1722.877472966681, 'nguoi': -1665.263258448031, 'trong': -1667.697736105684} duoc True
duoc {'cothe': -2997.2709413153752, 'duoc': -2802.295328345236, 'khong': -2983.3985845193906, 'nguoi': -3104.5045564312536, 'trong': -2934.2487924590155} duoc True
duoc {'cothe': -2257.1786777688785, 'duoc': -2117.8246241586544, 'khong': -2259.3771803417594, 'nguoi': -2357.31723901861, 'trong': -2224.973224260978} duoc True
duoc {'cothe': -2559.6138755907705, 'duoc': -2335.240266919409, 'khong': -2596.999533232548, 'nguoi': -2650.950769931078, 'trong': -2581.565614324244} duoc True
duoc {'cothe': -2781.1499873329726, 'duoc': -2437.0638939055807, 'khong': -2799.120726808751, 'nguoi': -2855.244601444078, 'trong': -2729.3965234642415} duoc True
duoc {'cothe': -1974.0120433

khong {'cothe': -1441.2220827874546, 'duoc': -1415.6076789558508, 'khong': -1370.4268793204926, 'nguoi': -1522.3485929641909, 'trong': -1443.5424921787758} khong True
khong {'cothe': -2400.7384433201487, 'duoc': -2436.8859319452686, 'khong': -2281.2879029935384, 'nguoi': -2464.2933515322206, 'trong': -2363.271996035923} khong True
khong {'cothe': -2419.169064931546, 'duoc': -2479.521032383223, 'khong': -2173.7100356813366, 'nguoi': -2521.3474941911295, 'trong': -2391.0513812646195} khong True
khong {'cothe': -2828.094762566283, 'duoc': -2940.1310384967574, 'khong': -2611.3566828662656, 'nguoi': -2958.392006281545, 'trong': -2855.437383655922} khong True
khong {'cothe': -2350.655800468159, 'duoc': -2350.2193579320724, 'khong': -2200.027025086723, 'nguoi': -2381.371692862855, 'trong': -2325.316639788074} khong True
khong {'cothe': -3513.3574692369725, 'duoc': -3540.1568977933716, 'khong': -3335.5954736069075, 'nguoi': -3630.9555562912137, 'trong': -3422.6239484784746} khong True
khong {'

khong {'cothe': -1883.130383357621, 'duoc': -1822.1478151625038, 'khong': -1708.220606574198, 'nguoi': -1879.3166856601003, 'trong': -1936.561194152628} khong True
khong {'cothe': -2116.006281318466, 'duoc': -2159.051322351026, 'khong': -2035.8136043985498, 'nguoi': -2223.4541606970993, 'trong': -2119.7136221126516} khong True
khong {'cothe': -1816.6614015554178, 'duoc': -1869.2047923238285, 'khong': -1663.0563066186714, 'nguoi': -1929.5930912186866, 'trong': -1854.6819454384568} khong True
khong {'cothe': -3176.2421119303203, 'duoc': -3172.441146866015, 'khong': -2725.441312054129, 'nguoi': -3268.224108135235, 'trong': -3154.899750871235} khong True
khong {'cothe': -3596.833476708404, 'duoc': -3634.0233352411688, 'khong': -3199.2028896805336, 'nguoi': -3717.416788244269, 'trong': -3628.9094766534404} khong True
khong {'cothe': -3845.948383667468, 'duoc': -3819.5499032749217, 'khong': -3678.963078628399, 'nguoi': -4002.580759894315, 'trong': -3755.74058369087} khong True
khong {'cothe'

nguoi {'cothe': -3124.05372487265, 'duoc': -3244.395245129341, 'khong': -3199.416481400047, 'nguoi': -2986.816214987887, 'trong': -3204.7266953100057} nguoi True
nguoi {'cothe': -1906.3891662765698, 'duoc': -1915.1925364174208, 'khong': -1897.4757441545798, 'nguoi': -1663.717693392623, 'trong': -1891.7863146721159} nguoi True
nguoi {'cothe': -2554.3034188028314, 'duoc': -2569.8046219424537, 'khong': -2660.7135540275476, 'nguoi': -2242.69370686043, 'trong': -2660.8548497756947} nguoi True
nguoi {'cothe': -2680.706455599725, 'duoc': -2698.7162664546504, 'khong': -2685.2428510689642, 'nguoi': -2370.408449389799, 'trong': -2647.02346202094} nguoi True
nguoi {'cothe': -2773.7499827599813, 'duoc': -2695.2836102065844, 'khong': -2717.140481676108, 'nguoi': -2416.6702063722737, 'trong': -2692.5935386931196} nguoi True
nguoi {'cothe': -3593.5626118360806, 'duoc': -3725.800844720325, 'khong': -3593.000187336799, 'nguoi': -3250.286088378265, 'trong': -3645.522998373675} nguoi True
nguoi {'cothe':

trong {'cothe': -4502.9467725530085, 'duoc': -4510.793751245575, 'khong': -4448.520872108587, 'nguoi': -4862.868208869367, 'trong': -4242.315838499089} trong True
trong {'cothe': -3765.3330569711, 'duoc': -3695.0801017237395, 'khong': -3656.9695128955127, 'nguoi': -3857.9837838097656, 'trong': -3378.099941150234} trong True
trong {'cothe': -3577.0405729659715, 'duoc': -3387.476248879032, 'khong': -3492.181063457525, 'nguoi': -3596.565356233002, 'trong': -3181.2089094179532} trong True
trong {'cothe': -3718.6143459471955, 'duoc': -3743.455358756876, 'khong': -3661.7195602558604, 'nguoi': -4023.9397662396377, 'trong': -3373.6099077485997} trong True
trong {'cothe': -2482.9221782303043, 'duoc': -2367.9818136244176, 'khong': -2474.671800663479, 'nguoi': -2578.9480441482274, 'trong': -2253.164599141145} trong True
trong {'cothe': -4093.341972600885, 'duoc': -3952.3006496432554, 'khong': -3847.529012495674, 'nguoi': -4086.9400643517047, 'trong': -3420.342487461113} trong True
trong {'cothe':

trong {'cothe': -5749.403408978557, 'duoc': -5642.3846290207775, 'khong': -5641.571760807211, 'nguoi': -6019.748539082036, 'trong': -5362.842928445183} trong True
test_cothe {'cothe': -4861.879006502512, 'duoc': -5121.288435915029, 'khong': -5092.181208375848, 'nguoi': -5210.726826771511, 'trong': -5188.314768434598} cothe True
test_cothe {'cothe': -3823.228677012259, 'duoc': -4211.393431112006, 'khong': -4098.141211236496, 'nguoi': -4260.030585856952, 'trong': -4110.786669666351} cothe True
test_cothe {'cothe': -6232.352446431924, 'duoc': -6676.986128360386, 'khong': -6584.073201998717, 'nguoi': -6711.994075877744, 'trong': -6645.268135014355} cothe True
test_cothe {'cothe': -3774.161512561472, 'duoc': -4175.683641324286, 'khong': -4074.738151455343, 'nguoi': -4170.554400441565, 'trong': -4164.7868153680865} cothe True
test_cothe {'cothe': -4272.843160881027, 'duoc': -4457.782250110943, 'khong': -4439.178283838058, 'nguoi': -4472.290373042251, 'trong': -4488.656720367959} cothe True
t

test_khong {'cothe': -2448.4735301071023, 'duoc': -2460.7388757484528, 'khong': -2185.537577621033, 'nguoi': -2589.6955689051947, 'trong': -2331.6161930505637} khong True
test_khong {'cothe': -1632.3146730733988, 'duoc': -1608.6655650830219, 'khong': -1535.0193390134864, 'nguoi': -1694.3277775243714, 'trong': -1594.3662176751236} khong True
test_khong {'cothe': -2457.7480521859743, 'duoc': -2618.684629652801, 'khong': -2272.9230055118437, 'nguoi': -2765.1424201725963, 'trong': -2546.721186426412} khong True
test_khong {'cothe': -1691.1606852351947, 'duoc': -1734.9804408599423, 'khong': -1535.621366737547, 'nguoi': -1744.957841479584, 'trong': -1746.3592129535805} khong True
test_khong {'cothe': -2529.085492244075, 'duoc': -2591.7847178826023, 'khong': -2351.8750530337893, 'nguoi': -2674.4926093073955, 'trong': -2616.568252396335} khong True
test_nguoi {'cothe': -1470.3632508677824, 'duoc': -1438.4501444750674, 'khong': -1465.9560237991095, 'nguoi': -1340.9813053553903, 'trong': -1489.2

In [8]:
import pickle 
with open("output.pkl", "wb") as file:
    pickle.dump(models, file)