In [1]:
import librosa
import numpy as np
import os
import math
from sklearn.cluster import KMeans
import hmmlearn.hmm
import pickle

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path)  # read .wav file
    hop_length = math.floor(sr*0.010)  # 10ms hop
    win_length = math.floor(sr*0.025)  # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1, 1))
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0)  # O^r
    # return T x 36 (transpose of X)
    return X.T  # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir, f))
            for f in files if f.endswith(".wav")]
    return mfcc


def clustering(X, n_clusters=10):
    kmeans = KMeans(n_clusters=n_clusters, n_init=50,
                    random_state=0, verbose=0)
    kmeans.fit(X)
    print("centers", kmeans.cluster_centers_.shape)
    return kmeans

def initByBakis(nComp, bakisLevel):
        ''' init start_prob and transmat_prob by Bakis model ''' 
        startprobPrior = np.zeros(nComp)
        startprobPrior[0 : bakisLevel - 1] = 1./ (bakisLevel - 1)
         
        transmatPrior = getTransmatPrior(nComp, bakisLevel)
         
        return startprobPrior, transmatPrior

def getTransmatPrior(nComp, bakisLevel):
        ''' get transmat prior '''
        transmatPrior = (1. / bakisLevel) * np.eye(nComp)
         
        for i in range(nComp - (bakisLevel - 1)):
            for j in range(bakisLevel - 1):
                transmatPrior[i, i + j + 1] = 1. /  bakisLevel
                 
        for i in range(nComp - bakisLevel + 1, nComp):
            for j in range(nComp - i -j):
                transmatPrior[i, i + j] = 1. / (nComp - i)
        return transmatPrior

In [3]:
class_names = ["cothe", "duoc", "khong", "nguoi", "trong", "test_cothe", "test_duoc", "test_khong", "test_nguoi", "test_trong"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data/cutted", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)

Load cothe dataset
Load duoc dataset
Load khong dataset
Load nguoi dataset
Load trong dataset
Load test_cothe dataset
Load test_duoc dataset
Load test_khong dataset
Load test_nguoi dataset
Load test_trong dataset
vectors (13684, 36)


In [6]:
models = {}
for cname in class_names:
    class_vectors = dataset[cname]
    nComp = 5
    startprobPrior,transmatPrior = initByBakis(nComp=nComp,bakisLevel=3)
    hmm = hmmlearn.hmm.GMMHMM(
        n_components = nComp, n_mix = 2, n_iter = 1000,verbose= True,
        params='mctw',
        init_params='mst',
        startprob_prior = startprobPrior,
        transmat_prior = transmatPrior
    )

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")
print("Testing")

training class cothe
(3535, 36) [44, 47, 51, 44, 47, 64, 41, 47, 41, 37, 37, 37, 37, 47, 37, 41, 37, 34, 41, 37, 47, 41, 37, 44, 47, 41, 51, 67, 37, 41, 41, 37, 37, 34, 47, 31, 67, 47, 37, 37, 44, 41, 37, 44, 34, 44, 44, 34, 47, 51, 34, 34, 34, 37, 41, 51, 27, 31, 37, 34, 47, 44, 37, 44, 34, 47, 37, 37, 44, 37, 41, 74, 44, 37, 41, 41, 41, 51, 44, 61, 37, 41, 41, 37] 84
         1     -372215.9127             +nan
         2     -358823.4794      +13392.4334
         3     -355965.7090       +2857.7704
         4     -355308.5948        +657.1143
         5     -355098.9046        +209.6901
         6     -355000.1154         +98.7892
         7     -354925.7898         +74.3256
         8     -354883.2588         +42.5311
         9     -354868.0000         +15.2588
        10     -354846.5584         +21.4416
        11     -354825.9967         +20.5617
        12     -354798.2382         +27.7585
        13     -354785.5048         +12.7333
        14     -354778.2273          +7.277

In [7]:
accuracy = {}
for cname in class_names:
    if cname[:4] != 'test':
        continue
    total_data = len(dataset[cname])
    true_cnt = 0
    # true result là tên chính xác của bộ test
    true_result = class_names[class_names.index(cname) % 5]
    for O in dataset[cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test'}
        result = max(score, key=lambda k: score[k])
        isTrue = true_result == result
        print(cname, score, result, isTrue)
        if isTrue:
            true_cnt += 1
    accuracy[cname] = true_cnt/total_data
print("\n\nAccuracy:", accuracy)

test_cothe {'cothe': -4874.150975301159, 'duoc': -5119.845247856504, 'khong': -5090.583217308381, 'nguoi': -5219.766508002532, 'trong': -5177.750660279599} cothe True
test_cothe {'cothe': -3831.0171199508945, 'duoc': -4174.8089696383195, 'khong': -4076.519856679497, 'nguoi': -4262.706303501729, 'trong': -4105.946393079537} cothe True
test_cothe {'cothe': -6235.511049076363, 'duoc': -6685.512539274409, 'khong': -6597.440553578652, 'nguoi': -6694.72755023392, 'trong': -6629.792142858172} cothe True
test_cothe {'cothe': -3778.1317992628533, 'duoc': -4141.2437437448225, 'khong': -4068.194485719782, 'nguoi': -4173.037975735423, 'trong': -4126.216416527114} cothe True
test_cothe {'cothe': -4482.82302815465, 'duoc': -4908.406033078506, 'khong': -4791.869817945348, 'nguoi': -5049.923843899186, 'trong': -4914.050535568599} cothe True
test_cothe {'cothe': -4116.0346762967065, 'duoc': -4440.4364965210825, 'khong': -4401.291448089454, 'nguoi': -4496.080945303724, 'trong': -4430.194275358646} cothe

In [8]:
with open("gmm_hmm.pkl", "wb") as file:
    pickle.dump(models, file)
print("Saved!")

Saved!
