In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm

In [10]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path) # read .wav file
    hop_length = math.floor(sr*0.010) # 10ms hop
    win_length = math.floor(sr*0.025) # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1,1)) 
    # energy feature
    rms = librosa.feature.rms(y, hop_length=hop_length)
    frame_feature = np.concatenate([mfcc, rms], axis=0)
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(frame_feature, order=1, mode='nearest')
    delta2 = librosa.feature.delta(frame_feature, order=2, mode='nearest')
    # X is 39 x T
    X = np.concatenate([frame_feature, delta1, delta2], axis=0) # O^r
    # return T x 39 (transpose of X)
    return X.T # hmmlearn use T x N matrix

In [11]:
mfcc = get_mfcc("./data/duong/17.wav")
mfcc.shape

(17, 39)

In [3]:
def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir,f)) for f in files if f.endswith(".wav")]
    return mfcc

In [4]:
def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50, random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans  

In [5]:
class_names = ["hai", "tien", "duong", "y_te", "benh_nhan", "test_hai", "test_tien", "test_duong", "test_y_te", "test_benh_nhan"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)
# Run K-Means algorithm to get clusters
kmeans = clustering(all_vectors)
print("centers", kmeans.cluster_centers_.shape)


Load hai dataset
Load tien dataset
Load duong dataset
Load y_te dataset
Load benh_nhan dataset
Load test_hai dataset
Load test_tien dataset
Load test_duong dataset
Load test_y_te dataset
Load test_benh_nhan dataset
vectors (15520, 39)
centers (10, 39)
centers (10, 39)


In [8]:
# dataset

In [None]:

nComp = 5
startprobPrior,transmatPrior = initByBakis(nComp=nComp,bakisLevel=3)
hmm = hmmlearn.hmm.GMMHMM(
    n_components = nComp, n_mix = 2, n_iter = 1000,verbose= True,
    params='mctw',
    init_params='mst',
    startprob_prior = startprobPrior,
    transmat_prior = transmatPrior
)

if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X)
    models[cname] = hmm

In [9]:
models = {}
cname = "y_te"
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R]
# O^r = (c1, c2, ... ct, ... cT)
# O^r size T x 1
# dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
hmm = hmmlearn.hmm.GMMHMM(
    n_components=9, random_state=0, n_iter=1000, verbose=True, 
    params='mctw', init_params='mc',
#     startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0]),
    transmat_prior=np.array([
        [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1]
    ]),
)
if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
print("Training done")

training class y_te
(4386, 39) [32, 44, 37, 50, 39, 44, 50, 39, 43, 63, 40, 62, 41, 47, 42, 37, 32, 44, 37, 50, 42, 49, 31, 46, 45, 34, 64, 58, 49, 72, 60, 59, 58, 39, 37, 49, 49, 39, 44, 50, 39, 60, 56, 52, 44, 61, 54, 39, 40, 76, 63, 37, 35, 51, 42, 41, 49, 41, 58, 59, 57, 38, 44, 53, 46, 54, 50, 46, 38, 50, 45, 47, 55, 57, 43, 38, 44, 35, 41, 58, 53, 38, 51, 61, 57, 48, 47, 63, 49, 34, 59, 43] 92


         1     -412554.4298             +nan


Training done


         2     -464640.0248      -52085.5950


In [17]:

cname = "benh_nhan"
class_vectors = dataset[cname]
# convert all vectors to the cluster index
# dataset['one'] = [O^1, ... O^R]
# O^r = (c1, c2, ... ct, ... cT)
# O^r size T x 1
# dataset[cname] = list([kmeans.predict(v).reshape(-1,1) for v in dataset[cname]])
hmm = hmmlearn.hmm.GMMHMM(
    n_components=24, random_state=0, n_iter=1000, verbose=True, 
    params='mctw', init_params='mc',
    startprob_prior=np.array([0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0]),
    transmat_prior=np.array([
        [0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1,0.0],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.2,0.1],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.7,0.3],
        [0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.0],
    ]),
)
if cname[:4] != 'test':
    X = np.concatenate(dataset[cname])
    lengths = list([len(x) for x in dataset[cname]])
    print("training class", cname)
    print(X.shape, lengths, len(lengths))
    hmm.fit(X, lengths=lengths)
    models[cname] = hmm
print("Training done")

training class benh_nhan
(4146, 39) [51, 39, 41, 47, 41, 65, 39, 52, 43, 48, 59, 68, 38, 50, 32, 40, 66, 50, 44, 41, 102, 44, 38, 49, 56, 44, 48, 33, 35, 54, 37, 38, 58, 32, 42, 61, 51, 47, 49, 40, 52, 31, 62, 56, 47, 35, 36, 58, 49, 39, 35, 40, 36, 42, 32, 34, 49, 53, 42, 40, 39, 45, 39, 40, 54, 50, 40, 58, 37, 46, 41, 33, 27, 27, 50, 45, 50, 52, 35, 45, 56, 37, 42, 65, 55, 42, 50, 37, 37, 45, 47] 91


         1     -396905.1104             +nan


Training done


         2     -457582.2685      -60677.1581


In [18]:
print("Testing")
for O in dataset["benh_nhan"]:
    score = {cname : model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test' }
    inverse = [(value, key) for key, value in score.items()]
    pre = max(inverse)[1]
    print("test", score, pre)

Testing
test {'y_te': -150014.68885810464, 'benh_nhan': -10841.076310580722} benh_nhan
test {'y_te': -92737.2563845418, 'benh_nhan': -9247.665576321784} benh_nhan
test {'y_te': -7273.872838340847, 'benh_nhan': -6970.376951160433} benh_nhan
test {'y_te': -10383.555034759445, 'benh_nhan': -11772.003397326069} y_te
test {'y_te': -9647.008489812926, 'benh_nhan': -9167.781293035458} benh_nhan
test {'y_te': -126843.21669608151, 'benh_nhan': -10603.959848476808} benh_nhan
test {'y_te': -76261.23182289005, 'benh_nhan': -6812.777440559161} benh_nhan
test {'y_te': -67581.74704744958, 'benh_nhan': -12275.530625002462} benh_nhan
test {'y_te': -52372.34217179058, 'benh_nhan': -10372.79182015253} benh_nhan
test {'y_te': -546296.0783689052, 'benh_nhan': -30500.010250575524} benh_nhan
test {'y_te': -640748.3758828251, 'benh_nhan': -34157.086993629826} benh_nhan
test {'y_te': -25874.099294522664, 'benh_nhan': -9918.211408731357} benh_nhan
test {'y_te': -52498.25998030749, 'benh_nhan': -9473.87833919561