In [67]:
import librosa
from hmmlearn import hmm
import os 
import numpy as np
from sklearn.model_selection import train_test_split
from tqdm import tqdm
import random
import IPython.display as dsp

## Chuẩn bị dữ liệu

In [8]:
path_file = "./Dataset/{}/{}/"
commands = ["a", 'b', 'ban','nhay','xuong','len','phai','trai']
name = ["nghia","khiem","huy","hoang","long"]

In [13]:
# os.listdir(path_file.format('nghia','a'))

In [22]:
def prepare_data():
    all_file_path = []
    for n in name:
        for c in commands:
            all_files_name = os.listdir(path_file.format(n,c))
            all_file_path += [(path_file.format(n,c) + num, c)  for num in all_files_name if '.wav' in num]
    train, test = train_test_split(all_file_path, test_size=0.2, random_state=6)
    
    return train, test

In [23]:
train_data, test_data = prepare_data()

In [24]:
train_data[:10]

[('./Dataset/huy/xuong/file57.wav', 'xuong'),
 ('./Dataset/huy/a/file12.wav', 'a'),
 ('./Dataset/nghia/trai/file80.wav', 'trai'),
 ('./Dataset/nghia/xuong/file40.wav', 'xuong'),
 ('./Dataset/huy/b/file62.wav', 'b'),
 ('./Dataset/long/xuong/file62.wav', 'xuong'),
 ('./Dataset/hoang/ban/file1.wav', 'ban'),
 ('./Dataset/long/phai/file11.wav', 'phai'),
 ('./Dataset/long/len/file121.wav', 'len'),
 ('./Dataset/huy/phai/file50.wav', 'phai')]

In [25]:
print(len(test_data))
print(len(train_data))

907
3624


In [26]:
train_data_loader =  {
    'a': [],
    'b': [],
    'len': [],
    'xuong': [],
    'trai': [],
    'phai': [],
    'ban': [],
    'nhay': []
}

for tup in train_data:
  train_data_loader[tup[1]].append(tup[0])

In [29]:
train_data_loader['a'][:10]

['./Dataset/huy/a/file12.wav',
 './Dataset/long/a/file84.wav',
 './Dataset/nghia/a/file48.wav',
 './Dataset/huy/a/file88.wav',
 './Dataset/huy/a/file118.wav',
 './Dataset/khiem/a/file115.wav',
 './Dataset/long/a/file70.wav',
 './Dataset/long/a/file104.wav',
 './Dataset/nghia/a/file4.wav',
 './Dataset/khiem/a/file20.wav']

## Trích xuất đặc trưng MFCC

In [30]:
def mfcc(wav_path):
    mfcc_feat = librosa.feature.mfcc(y = y, sr = sr, n_mfcc = 13)
    ans = [mfcc_feat]
    mfcc_delta1 = librosa.feature.delta(mfcc_feat, order = 1, mode ='nearest')
    ans.append(mfcc_delta1)
    mfcc_delta2 = librosa.feature.delta(mfcc_feat, order = 2, mode ='nearest')
    ans.append(mfcc_delta2)

    return np.transpose(np.concatenate(ans, axis = 0),[1,0])

## Training

In [31]:
def train(train_files):
    X = np.array([])
    for file_name in train_files:
        try:
            features_mfcc = mfcc(file_name)
            if len(X) == 0:
                X = features_mfcc
            else:
                try:
                    X = np.append(X, features_mfcc, axis=0)
                except:
                    pass
        except:
            print(file_path)
            pass
    model = hmm.GaussianHMM(n_components=4, covariance_type='diag', n_iter=1800)
    # fit hmm model
    np.seterr(all='ignore')
    model.fit(X)
    return model

In [49]:
hmm_models = []
for lab, wav_path_list in train_loader.items():
    print(wav_path_list[:10])
    label = lab
    print(lab)
    hmm_models.append((train(wav_path_list), label))

['./Dataset/huy/a/file12.wav', './Dataset/long/a/file84.wav', './Dataset/nghia/a/file48.wav', './Dataset/huy/a/file88.wav', './Dataset/huy/a/file118.wav', './Dataset/khiem/a/file115.wav', './Dataset/long/a/file70.wav', './Dataset/long/a/file104.wav', './Dataset/nghia/a/file4.wav', './Dataset/khiem/a/file20.wav']
a
['./Dataset/huy/b/file62.wav', './Dataset/nghia/b/file74.wav', './Dataset/hoang/b/file59.wav', './Dataset/khiem/b/file22.wav', './Dataset/khiem/b/file48.wav', './Dataset/nghia/b/file76.wav', './Dataset/long/b/file76.wav', './Dataset/long/b/file22.wav', './Dataset/long/b/file106.wav', './Dataset/khiem/b/file130.wav']
b
['./Dataset/long/len/file121.wav', './Dataset/long/len/file5.wav', './Dataset/huy/len/file60.wav', './Dataset/hoang/len/file43.wav', './Dataset/khiem/len/file13.wav', './Dataset/khiem/len/file0.wav', './Dataset/long/len/file66.wav', './Dataset/khiem/len/file124.wav', './Dataset/long/len/file109.wav', './Dataset/huy/len/file16.wav']
len
['./Dataset/huy/xuong/file

In [50]:
len(hmm_models)

8

## Testing

In [59]:
def predict(hmm_models, test_file):
    features_mfcc_test = mfcc(test_file)
    max_score = -float('inf')
    predicted_label = ""
    for item in hmm_models:
        model, label = item
        score = model.score(features_mfcc_test)
        if score > max_score:
            max_score = score
            predicted_label = label
    return predicted_label

In [53]:
test_data[:10]

[('./Dataset/nghia/len/file11.wav', 'len'),
 ('./Dataset/khiem/trai/file100.wav', 'trai'),
 ('./Dataset/long/phai/file82.wav', 'phai'),
 ('./Dataset/khiem/ban/file42.wav', 'ban'),
 ('./Dataset/long/b/file110.wav', 'b'),
 ('./Dataset/long/ban/file19.wav', 'ban'),
 ('./Dataset/hoang/nhay/file104.wav', 'nhay'),
 ('./Dataset/khiem/phai/file66.wav', 'phai'),
 ('./Dataset/huy/b/file123.wav', 'b'),
 ('./Dataset/long/trai/file89.wav', 'trai')]

In [60]:
predict_true = 0
for test in tqdm(test_data):
    predict_label = predict(hmm_models,test[0])
    if predict_label == test[1]:
        predict_true +=1

100%|██████████| 907/907 [00:24<00:00, 36.28it/s]


In [69]:
print('Acc',predict_true/len(test_data))

Acc 0.9570011025358324


In [70]:
len(test_data)

907

## Demo

In [89]:
index_nums = random.randint(0,906)
t_path = test_data[index_nums][0]
print('Test path file', t_path)

Test path file ./Dataset/nghia/trai/file26.wav


In [90]:
dsp.Audio(t_path)

In [91]:
predict(hmm_models, t_path)

'trai'