In [1]:
import librosa
import numpy as np
import os
import math
import hmmlearn.hmm
import pickle

In [2]:
def get_mfcc(file_path):
    y, sr = librosa.load(file_path)  # read .wav file
    hop_length = math.floor(sr*0.010)  # 10ms hop
    win_length = math.floor(sr*0.025)  # 25ms frame
    # mfcc is 12 x T matrix
    mfcc = librosa.feature.mfcc(
        y, sr, n_mfcc=12, n_fft=1024,
        hop_length=hop_length, win_length=win_length)
    # substract mean from mfcc --> normalize mfcc
    mfcc = mfcc - np.mean(mfcc, axis=1).reshape((-1, 1))
    # delta feature 1st order and 2nd order
    delta1 = librosa.feature.delta(mfcc, order=1)
    delta2 = librosa.feature.delta(mfcc, order=2)
    # X is 36 x T
    X = np.concatenate([mfcc, delta1, delta2], axis=0)  # O^r
    # return T x 36 (transpose of X)
    return X.T  # hmmlearn use T x N matrix

def get_class_data(data_dir):
    files = os.listdir(data_dir)
    mfcc = [get_mfcc(os.path.join(data_dir, f))
            for f in files if f.endswith(".wav")]
    return mfcc

def initByBakis(nComp, bakisLevel):
        ''' init start_prob and transmat_prob by Bakis model ''' 
        startprobPrior = np.zeros(nComp)
        startprobPrior[0 : bakisLevel - 1] = 1./ (bakisLevel - 1)
         
        transmatPrior = getTransmatPrior(nComp, bakisLevel)
         
        return startprobPrior, transmatPrior

def getTransmatPrior(nComp, bakisLevel):
        ''' get transmat prior '''
        transmatPrior = (1. / bakisLevel) * np.eye(nComp)
         
        for i in range(nComp - (bakisLevel - 1)):
            for j in range(bakisLevel - 1):
                transmatPrior[i, i + j + 1] = 1. /  bakisLevel
                 
        for i in range(nComp - bakisLevel + 1, nComp):
            for j in range(nComp - i -j):
                transmatPrior[i, i + j] = 1. / (nComp - i)
        return transmatPrior

In [3]:
class_names = ["cothe", "duoc", "khong", "nguoi", "trong", "test_cothe", "test_duoc", "test_khong", "test_nguoi", "test_trong"]
dataset = {}
for cname in class_names:
    print(f"Load {cname} dataset")
    dataset[cname] = get_class_data(os.path.join("data/cutted", cname))

# Get all vectors in the datasets
all_vectors = np.concatenate([np.concatenate(v, axis=0) for k, v in dataset.items()], axis=0)
print("vectors", all_vectors.shape)

Load cothe dataset
Load duoc dataset
Load khong dataset
Load nguoi dataset
Load trong dataset
Load test_cothe dataset
Load test_duoc dataset
Load test_khong dataset
Load test_nguoi dataset
Load test_trong dataset
vectors (13684, 36)


In [10]:
models = {}
for cname in class_names:
    class_vectors = dataset[cname]
    nComp = 7
    startprobPrior,transmatPrior = initByBakis(nComp=nComp,bakisLevel=3)
    hmm = hmmlearn.hmm.GMMHMM(
        n_components = nComp, n_mix = 2, n_iter = 1000,verbose= True,
        params='mctw',
        init_params='mst',
        startprob_prior = startprobPrior,
        transmat_prior = transmatPrior
    )

    if cname[:4] != 'test':
        X = np.concatenate(dataset[cname])
        lengths = list([len(x) for x in dataset[cname]])
        print("training class", cname)
        print(X.shape, lengths, len(lengths))
        hmm.fit(X)
        models[cname] = hmm
print("Training done")
print("Testing")

training class cothe
(3535, 36) [44, 47, 51, 44, 47, 64, 41, 47, 41, 37, 37, 37, 37, 47, 37, 41, 37, 34, 41, 37, 47, 41, 37, 44, 47, 41, 51, 67, 37, 41, 41, 37, 37, 34, 47, 31, 67, 47, 37, 37, 44, 41, 37, 44, 34, 44, 44, 34, 47, 51, 34, 34, 34, 37, 41, 51, 27, 31, 37, 34, 47, 44, 37, 44, 34, 47, 37, 37, 44, 37, 41, 74, 44, 37, 41, 41, 41, 51, 44, 61, 37, 41, 41, 37] 84
         1     -371221.4923             +nan
         2     -355693.0711      +15528.4212
         3     -353378.6267       +2314.4445
         4     -352235.2065       +1143.4202
         5     -351425.8686        +809.3379
         6     -350824.2009        +601.6678
         7     -350504.3725        +319.8284
         8     -350320.4405        +183.9320
         9     -350188.3878        +132.0527
        10     -350082.9931        +105.3946
        11     -350009.0472         +73.9460
        12     -349955.4878         +53.5594
        13     -349919.0464         +36.4415
        14     -349881.0506         +37.995

In [12]:
accuracy = {}
for cname in class_names:
    if cname[:4] != 'test':
        continue
    total_data = len(dataset[cname])
    true_cnt = 0
    # true result là tên chính xác của bộ test
    true_result = class_names[class_names.index(cname) % 5]
    for O in dataset[cname]:
        score = {cname: model.score(O, [len(O)]) for cname, model in models.items() if cname[:4] != 'test'}
        result = max(score, key=lambda k: score[k])
        isTrue = true_result == result
        print(cname, score, result, isTrue)
        if isTrue:
            true_cnt += 1
    accuracy[cname] = true_cnt/total_data
print("\n\nAccuracy:", accuracy)

Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
Degenerate mixture covariance
test_cothe {'cothe': -4800.9480960082155, 'duoc': -5151.709443309209, 'khong': -5129.908728585808, 'nguoi': -5208.054970982674, 'trong': -5247.331359433581} cothe True
test_cothe {'cothe': -3797.8655680747743, 'duoc': -4193.50849015669, 'khong': -4078.850640364571, 'nguoi': -4278.792518828359, 'trong': -4145.474284523301} cothe True
test_cothe {'cothe': -6187.859111405024, 'duoc': -6663.154543815673, 'khong': -6609.351423479, 'nguoi': -6676.419075413217, 'trong': -6677.838643820601} cothe True
test_cothe {'cothe': -3753.380462349721, 'duoc': -4167.484352009792, 'khong': -4121.689947817978, 'nguoi': -4209.549379411256, 'trong': -4175

In [13]:
with open("gmm_hmm.pkl", "wb") as file:
    pickle.dump(models, file)
print("Saved!")

Saved!
