In [0]:
%cd drive/My\ Drive/XLTN

In [0]:
import pickle
import random

In [0]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [0]:
import tensorflow as tf
from tensorflow import keras

In [0]:
!pip install hmmlearn

In [0]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length)
        #hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0) # O^r
    # return T x 36 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [0]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [0]:
def clustering(X, n_clusters=5):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [0]:
dataset = {}
cname ="co_the"
print(f"Load {cname} dataset")
dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

Load co_the dataset


In [0]:

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

vectors (3273, 36)
centers (5, 36)
centers (5, 36)


In [0]:
class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
random.shuffle(dataset[cname])
hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=13, random_state=0, n_iter=1000, verbose=True,
    startprob_prior=np.array([0.02, 0.58 ,0.23, 0.  , 0.  , 0.17 ,0. ,  0.,   0.,   0.  , 0. ,  0. ,  0.  ]),
    transmat_prior=np.array([[0.66 ,0. ,  0.,   0. ,  0. ,  0.,   0. ,  0.34, 0. ,  0. ,  0. ,  0.   ,0.  ],
 [0.  , 0.57 ,0. ,  0.  , 0. ,  0.23, 0.,   0. ,  0.12 ,0. ,  0.07, 0.  , 0.02],
 [0.  , 0.31, 0.69, 0. ,  0.,   0. ,  0. ,  0.,   0. ,  0. ,  0. ,  0. ,  0.  ],
 [0. ,  0. ,  0.  , 0.82, 0.18, 0. ,  0.  , 0.  , 0. ,  0.,   0. ,  0. ,  0.  ],
 [0.  , 0. ,  0.  , 0.,   0.75, 0. ,  0.24, 0. ,  0.01, 0. , 0.  , 0. ,  0.  ],
 [0.  , 0.13 ,0. ,  0.  , 0.  , 0.67, 0. ,  0.04 ,0.04 ,0.  , 0.  , 0. ,  0.11],
 [0.  , 0. ,  0.  , 0. ,  0.07, 0.  , 0.78 ,0. ,  0. ,  0.15, 0. ,  0. ,  0.  ],
 [0.  , 0. ,  0. ,  0.,   0.  , 0.02, 0.02, 0.07, 0.05, 0.  , 0.84, 0. ,  0.  ],
 [0.  , 0.,   0.,   0.13 ,0. ,  0.  , 0.14, 0. ,  0.7,  0. ,  0. ,  0.03, 0.  ],
 [0.  , 0.  , 0.  , 0.,   0. ,  0.  , 0.  , 0.22 ,0.   ,0.78, 0. ,  0. ,  0.  ],
 [0.  , 0.02, 0.08, 0. ,  0.  , 0.  , 0. ,  0.,   0.17 ,0.  , 0.73, 0. ,  0.  ],
 [0.  , 0.  , 0. ,  0.  , 0. ,  0.  , 0.43, 0.  , 0. ,  0. ,  0.  , 0.57 ,0.  ],
 [0.  , 0.,   0.  , 0. ,  0. ,  0.22, 0. ,  0. ,  0.  , 0.   ,0.  , 0.  , 0.78]])
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)

with open("co_the_model.pkl", "wb") as file: pickle.dump(hmm, file)

training class co_the
(3273, 1) [52, 19, 25, 32, 30, 22, 25, 53, 30, 52, 32, 36, 36, 24, 23, 36, 32, 33, 27, 27, 27, 28, 44, 27, 35, 22, 27, 27, 30, 27, 24, 50, 25, 30, 24, 30, 29, 37, 46, 24, 21, 50, 34, 32, 41, 34, 34, 85, 26, 26, 52, 41, 37, 32, 25, 26, 57, 21, 23, 34, 19, 24, 25, 24, 35, 33, 20, 36, 19, 38, 31, 44, 28, 30, 29, 16, 24, 36, 34, 35, 22, 37, 30, 25, 41, 32, 22, 23, 20, 21, 34, 24, 27, 36, 19, 36, 32, 47, 20, 53, 41, 29, 32] 103


         1       -5181.7019             +nan
         2       -5017.5278        +164.1741
         3       -4928.6408         +88.8870
         4       -4721.2866        +207.3542
         5       -4246.5101        +474.7765
         6       -3449.3520        +797.1581
         7       -2892.5886        +556.7634
         8       -2768.0510        +124.5376
         9       -2712.1538         +55.8972
        10       -2652.2685         +59.8853
        11       -2572.0744         +80.1941
        12       -2480.5307         +91.5437
        13       -2404.4506         +76.0801
        14       -2358.9274         +45.5232
        15       -2324.1538         +34.7737
        16       -2283.8335         +40.3203
        17       -2246.4553         +37.3782
        18       -2215.1197         +31.3357
        19       -2192.4895         +22.6301
        20       -2177.4694         +15.0201
        21       -2166.8299         +10.6395
        22       -2157.7791          +9.0507
        23

In [0]:
dataset = {}
cname ="khong"
print(f"Load {cname} dataset")
dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
random.shuffle(dataset[cname])
hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=9, random_state=0, n_iter=1000, verbose=True,
    startprob_prior=np.array([0.02, 0.45 ,0. ,  0.07, 0.26 ,0.07 ,0.13, 0.  , 0.  ]),
    transmat_prior=np.array([[0.4 ,0.  ,0.,  0. , 0. , 0.5, 0. , 0.  ,0. ],
 [0. , 0.6 ,0. , 0. , 0.,  0. , 0.,  0.3, 0. ],
 [0. , 0.4, 0.3, 0.1 ,0.1, 0.,  0. , 0. , 0. ],
 [0. , 0. , 0. , 0.8, 0. , 0. , 0.,  0.,  0.1],
 [0. , 0. , 0.1, 0.,  0.8, 0.,  0.,  0. , 0. ],
 [0. , 0.,  0. , 0.2, 0. , 0.3, 0.,  0. , 0.4],
 [0. , 0. , 0.,  0.,  0.1, 0.,  0.9, 0.,  0. ],
 [0.1 ,0. , 0. , 0.1 ,0. , 0.2, 0. , 0.6, 0. ],
 [0.  ,0. , 0. , 0. , 0.  ,0. , 0.2, 0. , 0.8]])
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
with open("khong_model.pkl", "wb") as file: pickle.dump(hmm, file)

Load khong dataset
vectors (3146, 36)
centers (5, 36)
centers (5, 36)
training class khong
(3146, 1) [30, 31, 18, 24, 27, 26, 33, 23, 19, 29, 31, 12, 26, 23, 25, 21, 23, 22, 33, 29, 23, 23, 20, 26, 19, 27, 18, 27, 25, 29, 20, 654, 22, 38, 28, 22, 22, 40, 26, 19, 45, 30, 29, 21, 27, 14, 28, 27, 25, 25, 23, 19, 28, 32, 27, 16, 29, 22, 22, 16, 27, 14, 27, 33, 23, 19, 28, 25, 21, 20, 25, 21, 18, 30, 26, 42, 23, 28, 37, 28, 18, 17, 23, 34, 19, 24, 24, 28, 19, 14, 24, 24, 32, 30, 32, 23, 24, 27, 29, 28] 100


         1       -5078.2901             +nan
         2       -4859.9989        +218.2911
         3       -4743.1976        +116.8013
         4       -4493.2868        +249.9108
         5       -4124.7024        +368.5844
         6       -3759.9478        +364.7546
         7       -3260.3314        +499.6165
         8       -2658.8954        +601.4359
         9       -2353.7909        +305.1046
        10       -2246.7569        +107.0340
        11       -2199.8660         +46.8908
        12       -2172.4785         +27.3875
        13       -2157.6052         +14.8734
        14       -2149.7157          +7.8895
        15       -2144.2447          +5.4710
        16       -2140.2506          +3.9941
        17       -2137.1414          +3.1092
        18       -2134.5411          +2.6003
        19       -2132.4561          +2.0850
        20       -2131.0932          +1.3629
        21       -2128.4970          +2.5962
        22       -2126.5758          +1.9212
        23

In [0]:
dataset = {}
cname ="nay"
print(f"Load {cname} dataset")
dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
random.shuffle(dataset[cname])
hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=9, random_state=0, n_iter=1000, verbose=True,
    startprob_prior=np.array([0.2, 0.,  0. , 0.8 ,0. , 0. , 0.,  0. , 0. ]),
    transmat_prior=np.array([[0.8 , 0.  , 0. ,  0. ,  0. ,  0. ,  0.2 , 0. ,  0.  ],
 [0.  , 0. ,  0.  , 1.  , 0.  , 0.  , 0.  , 0.  , 0.  ],
 [0.  , 0.,   0.74, 0. ,  0. ,  0. ,  0. ,  0.26 ,0.  ],
 [0.22, 0. ,  0. ,  0.77 ,0. ,  0. ,  0. ,  0. ,  0.02],
 [0.  , 0.01 ,0.27, 0. ,  0.72, 0.,   0.  , 0.  , 0.  ],
 [0.  , 0. ,  0.,   0.  , 0.31, 0.69, 0. ,  0.  , 0.  ],
 [0.  , 0. ,  0.  , 0.   ,0.,   0.,   0.8 , 0. ,  0.2 ],
 [0.  , 0.  , 0.  , 0.,   0. ,  0. ,  0.,   1. ,  0.  ],
 [0.  , 0. ,  0.   ,0.,   0.  , 0.32, 0.  , 0.  , 0.68]])
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
with open("nay_model.pkl", "wb") as file: pickle.dump(hmm, file)

Load nay dataset
vectors (2884, 36)
centers (5, 36)
centers (5, 36)
training class nay
(2884, 1) [15, 25, 24, 37, 32, 19, 32, 33, 28, 31, 14, 36, 34, 24, 36, 33, 26, 32, 34, 36, 36, 27, 35, 35, 14, 33, 32, 31, 33, 27, 19, 34, 35, 32, 25, 27, 25, 25, 22, 28, 37, 25, 32, 28, 28, 30, 22, 32, 36, 35, 31, 27, 19, 34, 30, 33, 28, 24, 36, 25, 28, 15, 37, 14, 24, 29, 32, 28, 34, 33, 30, 33, 35, 32, 29, 30, 27, 35, 37, 15, 36, 27, 26, 33, 30, 22, 36, 32, 24, 37, 36, 34, 29, 37, 34, 28, 26, 32] 98


         1       -4659.1084             +nan
         2       -4379.3834        +279.7250
         3       -4255.7172        +123.6662
         4       -4022.9410        +232.7762
         5       -3720.3273        +302.6137
         6       -3235.0778        +485.2495
         7       -2421.6140        +813.4637
         8       -1829.8193        +591.7947
         9       -1684.1874        +145.6319
        10       -1650.4357         +33.7517
        11       -1610.9259         +39.5098
        12       -1476.0490        +134.8770
        13       -1281.6545        +194.3944
        14       -1198.4038         +83.2508
        15       -1174.7351         +23.6686
        16       -1163.9887         +10.7465
        17       -1155.6797          +8.3090
        18       -1147.8752          +7.8044
        19       -1140.4913          +7.3839
        20       -1133.1109          +7.3803
        21       -1125.5908          +7.5201
        22       -1118.9107          +6.6802
        23

In [0]:
dataset = {}
cname ="nguoi"
print(f"Load {cname} dataset")
dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
random.shuffle(dataset[cname])
hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=9, random_state=0, n_iter=1000, verbose=True,
    startprob_prior=np.array([0.  , 0.  , 0.  , 0.21 ,0. ,  0. ,  0. ,  0.37, 0.42]),
    transmat_prior=np.array([[0.  , 0. ,  0. ,  0. ,  0.,   0. ,  1. ,  0. ,  0.  ],
 [0.08, 0.64, 0.27, 0.,   0.,   0. ,  0.  , 0. ,  0.  ],
 [0.  , 0.  , 0.43 ,0. ,  0.57, 0.  , 0. ,  0. ,  0.  ],
 [0.01, 0.03, 0.  , 0.81, 0. ,  0.16, 0. ,  0. ,  0.  ],
 [0.  , 0.  , 0.   ,0. ,  0.81 ,0. ,  0.19 ,0. ,  0.  ],
 [0.  , 0.46, 0.  , 0.  , 0.  , 0.42 ,0.  , 0.12, 0.  ],
 [0.  , 0.  , 0. ,  0. ,  0. ,  0. ,  0.84 ,0.01, 0.14],
 [0.  , 0.18 ,0.  , 0.12 ,0.  , 0.13 ,0.  , 0.57, 0.  ],
 [0.   ,0. ,  0.  , 0.3 , 0. ,  0. ,  0. , 0.   ,0.7 ]])
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
with open("nguoi_model.pkl", "wb") as file: pickle.dump(hmm, file)

Load nguoi dataset
vectors (2597, 36)
centers (5, 36)
centers (5, 36)
training class nguoi
(2597, 1) [25, 33, 36, 20, 31, 18, 47, 18, 55, 19, 28, 15, 22, 20, 19, 30, 40, 23, 23, 20, 22, 22, 17, 18, 15, 21, 18, 44, 38, 29, 23, 22, 32, 35, 24, 20, 24, 22, 18, 53, 23, 34, 19, 23, 18, 22, 20, 44, 15, 20, 24, 35, 17, 37, 22, 26, 30, 25, 27, 16, 21, 26, 37, 22, 29, 23, 30, 17, 16, 33, 30, 36, 39, 13, 20, 25, 30, 22, 16, 18, 30, 17, 21, 20, 16, 25, 19, 33, 22, 48, 16, 16, 23, 22, 36, 19, 28, 14, 18, 36, 39, 20] 102


         1       -4338.5511             +nan
         2       -3939.9885        +398.5625
         3       -3797.5630        +142.4255
         4       -3522.1818        +275.3812
         5       -3149.0823        +373.0995
         6       -2681.9602        +467.1222
         7       -2163.8522        +518.1079
         8       -1961.8329        +202.0193
         9       -1912.6899         +49.1430
        10       -1883.6086         +29.0813
        11       -1864.0325         +19.5761
        12       -1850.3622         +13.6703
        13       -1840.1207         +10.2415
        14       -1831.1636          +8.9571
        15       -1822.7105          +8.4531
        16       -1815.0923          +7.6182
        17       -1808.7754          +6.3169
        18       -1804.2488          +4.5265
        19       -1800.6500          +3.5988
        20       -1797.9388          +2.7112
        21       -1796.0253          +1.9135
        22       -1794.9223          +1.1030
        23

In [0]:
dataset = {}
cname ="toi"
print(f"Load {cname} dataset")
dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)

class_vectors = dataset[cname]

dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
random.shuffle(dataset[cname])
hmm = hmmlearn.hmm.MultinomialHMM(
    n_components=9, random_state=0, n_iter=1000, verbose=True,
    startprob_prior=np.array([0. ,  0.  , 0. ,  0.01 ,0. ,  0. ,  0. ,  0. ,  0.99]),
    transmat_prior=np.array([[1. ,  0.,   0.  , 0.  , 0.  , 0. ,  0. ,  0. ,  0.  ],
 [0.,   0.62, 0.38, 0. ,  0.  , 0. ,  0. ,  0.  , 0.  ],
 [0.  , 0.  , 0.55 ,0.  , 0.04, 0. ,  0.41, 0. ,  0.  ],
 [0.  , 0.34 ,0.  , 0.61 ,0. ,  0.  , 0.,   0.05 ,0.  ],
 [0.  , 0. ,  0.  , 0.  , 0.69 ,0.  , 0.31 ,0. ,  0.  ],
 [0.  , 0.  , 0.02, 0.25 ,0. ,  0.73, 0.  , 0.   ,0.  ],
 [0.15, 0. ,  0.  , 0.  , 0.,   0.,   0.85 ,0. ,  0.  ],
 [0.  , 0. ,  0.,   0.,   0.,   0. ,  0.32, 0.68 ,0.  ],
 [0.  , 0. ,  0. ,  0.,   0.,   0.5,  0.,   0. ,  0.5 ]])
)

X = np.concatenate(dataset[cname])
lengths = list([len(x) for x in dataset[cname]])
print("training class", cname)
print(X.shape, lengths, len(lengths))
hmm.fit(X, lengths=lengths)
with open("toi_model.pkl", "wb") as file: pickle.dump(hmm, file)

Load toi dataset
vectors (1701, 36)
centers (5, 36)
centers (5, 36)
training class toi
(1701, 1) [13, 13, 12, 16, 13, 22, 16, 17, 21, 13, 13, 19, 17, 16, 16, 16, 18, 17, 18, 10, 29, 18, 23, 12, 11, 17, 26, 21, 14, 12, 21, 18, 17, 17, 12, 24, 23, 10, 15, 26, 28, 26, 19, 16, 20, 31, 18, 18, 24, 14, 17, 12, 16, 10, 23, 16, 19, 19, 17, 14, 20, 16, 12, 14, 9, 14, 13, 14, 15, 13, 24, 14, 18, 17, 11, 15, 13, 16, 18, 17, 12, 21, 21, 10, 25, 16, 16, 19, 18, 18, 14, 22, 23, 22, 26, 16, 13, 16, 11] 99


         1       -2710.3285             +nan
         2       -2354.4803        +355.8483
         3       -2309.9312         +44.5491
         4       -2226.4407         +83.4905
         5       -2061.5256        +164.9151
         6       -1799.5045        +262.0211
         7       -1475.7581        +323.7464
         8       -1262.9272        +212.8308
         9       -1181.2228         +81.7044
        10       -1137.8456         +43.3772
        11       -1111.2389         +26.6068
        12       -1092.7198         +18.5190
        13       -1083.4307          +9.2892
        14       -1075.0782          +8.3524
        15       -1068.6312          +6.4470
        16       -1063.8604          +4.7709
        17       -1059.7570          +4.1034
        18       -1056.6402          +3.1168
        19       -1054.2382          +2.4020
        20       -1052.3374          +1.9009
        21       -1050.7914          +1.5459
        22       -1049.4454          +1.3460
        23

In [0]:
class_names_test = ["test"]
test_dataset = {}
for cname in class_names_test:
    print(f"Load {cname} dataset")
    test_dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in test_dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)


In [0]:
models={}


cname = "co_the"
pkl_filename = 'co_the_model6.pkl'
# Loading the saved model pickle
model_pkl = open(pkl_filename, 'rb')
model = pickle.load(model_pkl)
models[cname] = model

cname = "khong"
pkl_filename = 'khong_model6.pkl'
# Loading the saved model pickle
model_pkl = open(pkl_filename, 'rb')
model = pickle.load(model_pkl)
models[cname] = model

cname = "nguoi"
pkl_filename = 'nguoi_model6.pkl'
# Loading the saved model pickle
model_pkl = open(pkl_filename, 'rb')
model = pickle.load(model_pkl)
models[cname] = model

cname = "toi"
pkl_filename = 'toi_model6.pkl'
# Loading the saved model pickle
model_pkl = open(pkl_filename, 'rb')
model = pickle.load(model_pkl)
models[cname] = model

cname = "nay"
pkl_filename = 'nay_model6.pkl'
# Loading the saved model pickle
model_pkl = open(pkl_filename, 'rb')
model = pickle.load(model_pkl)
models[cname] = model




In [0]:
class_names = ["test"]
dataset = {}
for cname in class_names:
    dataset[cname] = get_class_data(os.path.join("./BT2/", cname))
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
#kmeans = clustering(all_vectors)

for cname in class_names:
    class_vectors = dataset[cname]
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])

for true_cname in class_names:

    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        max_value = max(score.values())
        max_key =[k for k, v in score.items() if v == max_value]
        max_key = str(max_key)
        max_key = max_key.split("'")[1]
        print(max_key)   # in ra xem nó là class nào


In [0]:
#load all train data to count acc
class_names = ["co_the", "khong", "nay", "nguoi", "toi", "test"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("./BT2/", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)


for cname in class_names:
    class_vectors = dataset[cname]
    # convert all vectors to the cluster index
    # dataset['one'] = [O^1, ... O^R]
    # O^r = (c1, c2, ... ct, ... cT)
    # O^r size T x 1
    dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
    
    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        




In [0]:
class_names = ["co_the", "khong", "nay", "nguoi", "toi"]
print("Testing")  

for true_cname in class_names:
    correct = 0
    alll = 0    
    alll = alll +1
    for O in dataset[true_cname]:
        alll = alll +1
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        max_value = max(score.values())

        max_key =[k for k, v in score.items() if v == max_value]
        max_key = str(max_key)
        max_key = max_key.split("'")[1]
        if max_key == true_cname:
          correct = correct +1
        print(true_cname, score)
    accuracy = correct/alll
    print("Accuracy "  + str(accuracy) + "%")


In [0]:
class_names = ["co_the", "khong", "nay", "nguoi", "toi"]
print("Testing")  
correct = 0
alll = 0
for true_cname in class_names:
    alll = alll +1
    for O in dataset[true_cname]:
        score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
        max_value = max(score.values())

        max_key =[k for k, v in score.items() if v == max_value]
        max_key = str(max_key)
        max_key = max_key.split("'")[1]
        if max_key == true_cname:
          correct = correct +1
        print(true_cname, score)
accuracy = correct/alll
print("Accuracy = " + str(accuracy) + "%")