In [1]:
pip install python_speech_features hmmlearn

Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
You should consider upgrading via the '/home/ma-user/anaconda3/envs/MindSpore/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pickle
import numpy as np
import scipy.io.wavfile as wvf
from python_speech_features import mfcc
from hmmlearn.hmm import GMMHMM
import heapq
import scipy.signal as signal
from python_speech_features import delta
from scipy.fftpack import dct

In [3]:
train_data_path = os.path.join(os.getcwd(), 'datas/train/speech')
label_path = os.path.join(os.getcwd(), 'datas/labels/trainprompts_m')
test_data_path = os.path.join(os.getcwd(), 'datas/test/speech')
model_path = 'hmm_gmm_model.pkl'

In [4]:
def wav2mfcc(labels, data_paths):
    trng_data = {}
    for label, data_path in zip(labels, data_paths):
        mfccs = []
        rate, sig = wvf.read(data_path)
        mfcc_feat = mfcc(sig, rate)
        mfccs.append(mfcc_feat)
        trng_data[label] = mfccs
    return trng_data

In [5]:
def obtain_config(labels):
    conf = {}
    for label in labels:
        conf[label] = {}
        conf[label]['n_components'] = 2
        conf[label]['n_mix'] = 2
    return conf

In [6]:
def get_hmm_gmm(trng_datas=None, GMM_configs=None, model_path='hmm_gmm_model.pkl', from_file=False):
    hmm_gmm = {}
    if not from_file:
        for label, trng_data in trng_datas.items():
            GMM_config = GMM_configs[label]
            hmm_gmm[label] = GMMHMM(
                n_components=GMM_config['n_components'],
                n_mix=GMM_config['n_mix'])
            if trng_data:
                hmm_gmm[label].fit(np.vstack(trng_data))
        pickle.dump(hmm_gmm, open(model_path, 'wb'))
    else:
        hmm_gmm = pickle.load(open(model_path, 'rb'))
    return hmm_gmm

In [9]:
def train(train_data_path, label_path, model_path):
    with open(os.path.join(label_path)) as f:
        labels = f.readlines()
    data_paths = [train_data_path + '/' + line.split()[0] + '.wav' for line in labels]
    labels = [' '.join(line.split()[1:]).strip() for line in labels]
    train_datas = wav2mfcc(labels, data_paths)
    GMM_configs = obtain_config(labels)
    hmm_gmm = get_hmm_gmm(train_datas, GMM_configs, model_path)
    return hmm_gmm


hmm_gmm = train(train_data_path, label_path, model_path)
print(hmm_gmm)

{'开 灯 七 小时': GMMHMM(algorithm='viterbi', covariance_type='diag',
       covars_prior=array([[[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5],
        [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5]],

       [[-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5],
        [-1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5, -1.5,
         -1.5, -1.5, -1.5]]]),
       covars...
       means_prior=array([[[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]],

       [[0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.],
        [0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.]]]),
       means_weight=array([[0., 0.],
       [0., 0.]]), min_covar=0.001,
       n_components=2, n_iter=10, n_mix=2, params='stmcw', random_state=None,
       startprob_prior=1.0, tol=0.01, transmat_prior=1.0, verbose=False,
       we

In [16]:
def test_features(feature_file, hmm_gmm):
    mfcc_feat = np.load(feature_file)
    pred = {}
    for model in hmm_gmm:
        pred[model] = hmm_gmm[model].score(mfcc_feat)
    return get_nbest(pred, 2), pred


def get_nbest(d, n):
    return heapq.nlargest(n, d, key=lambda k: d[k])


def predict_label(feature_file, hmm_gmm):
    predicted = test_features(feature_file, hmm_gmm)
    return predicted


feature_file = os.path.join('Features.npy')
predicted, probs = predict_label(feature_file, hmm_gmm)
print('预测结果: %s' % predicted[0])

预测结果: 开 阀门 五 秒
