In [1]:
pip install python_speech_features hmmlearn

Looking in indexes: http://repo.myhuaweicloud.com/repository/pypi/simple
You should consider upgrading via the '/home/ma-user/anaconda3/envs/MindSpore/bin/python -m pip install --upgrade pip' command.[0m
Note: you may need to restart the kernel to use updated packages.


In [2]:
import os
import pickle
import numpy as np
import scipy.io.wavfile as wvf
from python_speech_features import mfcc
from hmmlearn.hmm import GMMHMM
import heapq
import scipy.signal as signal
from python_speech_features import delta
from scipy.fftpack import dct

In [3]:
train_data_path = os.path.join(os.getcwd(), 'datas/train/speech')
label_path = os.path.join(os.getcwd(), 'datas/labels/trainprompts_m')
test_data_path = os.path.join(os.getcwd(), 'datas/test/speech')
model_path = 'hmm_gmm_model.pkl'

In [4]:
def wav2mfcc(labels, data_paths):
    trng_data = {}
    for label, data_path in zip(labels, data_paths):
        print(f'Processing file: {data_path}')
        mfccs = []
        rate, sig = wvf.read(data_path)
        print(f'Read wav file {data_path}, sample rate: {rate}, signal length: {len(sig)}')

        mfcc_feat = mfcc(sig, rate)
        print(f'MFCC Features shape: {mfcc_feat.shape}')

        mfccs.append(mfcc_feat)
        trng_data[label] = mfccs

    print(f'MFCC extraction completed for {len(trng_data)} labels.')
    return trng_data

In [9]:
def obtain_config(labels):
    conf = {}
    print('Generating GMM-HMM configurations for each label...')
    for label in labels:
        conf[label] = {}
        conf[label]['n_components'] = 2
        conf[label]['n_mix'] = 2
        print(f"Label: {label}, Components: {conf[label]['n_components']}, Mixtures: {conf[label]['n_mix']}")
    return conf

In [13]:
def get_hmm_gmm(trng_datas=None, GMM_configs=None, model_path='hmm_gmm_model.pkl', from_file=False):
    hmm_gmm = {}

    if not from_file:
        print('Training GMM-HMM models...')
        for label, trng_data in trng_datas.items():
            GMM_config = GMM_configs[label]
            print(f"Training model for label: {label} with {GMM_config['n_components']} components and {GMM_config['n_mix']} mixtures")
            hmm_gmm[label] = GMMHMM(
                n_components=GMM_config['n_components'],
                n_mix=GMM_config['n_mix'])
            if trng_data:
                hmm_gmm[label].fit(np.vstack(trng_data))
                print(f'Fitted model for label: {label}')

        # Save the trained model
        pickle.dump(hmm_gmm, open(model_path, 'wb'))
        print(f'Model saved to {model_path}')
    else:
        print(f'Loading model from {model_path}')
        hmm_gmm = pickle.load(open(model_path, 'rb'))

    print('Returning GMM-HMM models.')
    return hmm_gmm

In [14]:
def train(train_data_path, label_path, model_path):
    print(f'Reading labels from {label_path}...')
    with open(os.path.join(label_path)) as f:
        labels = f.readlines()

    data_paths = [train_data_path + '/' + line.split()[0] + '.wav' for line in labels]
    print(f'Data paths: {data_paths[:5]}...')  # Show a sample of the first 5 paths

    labels = [' '.join(line.split()[1:]).strip() for line in labels]
    print(f'Labels: {labels[:5]}...')  # Show a sample of the first 5 labels

    print('Extracting MFCC features for training data...')
    train_datas = wav2mfcc(labels, data_paths)

    print('Obtaining GMM-HMM configuration for labels...')
    GMM_configs = obtain_config(labels)

    print('Training GMM-HMM models...')
    hmm_gmm = get_hmm_gmm(train_datas, GMM_configs, model_path)

    print(f'Training complete. Models saved to {model_path}')
    return hmm_gmm


hmm_gmm = train(train_data_path, label_path, model_path)
print(f'Trained HMM-GMM models: {hmm_gmm}')

Reading labels from /home/ma-user/work/datas/labels/trainprompts_m...
Data paths: ['/home/ma-user/work/datas/train/speech/S0001.wav', '/home/ma-user/work/datas/train/speech/S0002.wav', '/home/ma-user/work/datas/train/speech/S0003.wav', '/home/ma-user/work/datas/train/speech/S0004.wav', '/home/ma-user/work/datas/train/speech/S0005.wav']...
Labels: ['开 灯 七 小时', '关闭 风扇', '关 灯 九 分钟', '打开 阀门', '开启 风扇 五 秒']...
Extracting MFCC features for training data...
Processing file: /home/ma-user/work/datas/train/speech/S0001.wav
Read wav file /home/ma-user/work/datas/train/speech/S0001.wav, sample rate: 16000, signal length: 55188
MFCC Features shape: (689, 13)
Processing file: /home/ma-user/work/datas/train/speech/S0002.wav
Read wav file /home/ma-user/work/datas/train/speech/S0002.wav, sample rate: 16000, signal length: 41391
MFCC Features shape: (516, 13)
Processing file: /home/ma-user/work/datas/train/speech/S0003.wav
Read wav file /home/ma-user/work/datas/train/speech/S0003.wav, sample rate: 16000

In [16]:
def test_features(feature_file, hmm_gmm):
    print(f'Loading MFCC features from {feature_file}...')
    mfcc_feat = np.load(feature_file)
    print(f'MFCC features shape: {mfcc_feat.shape}')

    print('Scoring features against each model...')
    pred = {}
    for model in hmm_gmm:
        score = hmm_gmm[model].score(mfcc_feat)
        pred[model] = score
        print(f'Score for model {model}: {score}')

    nbest = get_nbest(pred, 2)

    return nbest, pred


def get_nbest(d, n):
    return heapq.nlargest(n, d, key=lambda k: d[k])


def predict_label(feature_file, hmm_gmm):
    print(f'Predicting label for feature file: {feature_file}')
    predicted = test_features(feature_file, hmm_gmm)
    return predicted


feature_file = os.path.join('Features.npy')
predicted, probs = predict_label(feature_file, hmm_gmm)
print('Predicted result: %s' % predicted[0])

Predicting label for feature file: Features.npy
Loading MFCC features from Features.npy...
MFCC features shape: (298, 13)
Scoring features against each model...
Score for model 开 灯 七 小时: -3539266.686152603
Score for model 关闭 风扇: -1984001.232717004
Score for model 关 灯 九 分钟: -3171574.770857236
Score for model 打开 阀门: -3795361.91860582
Score for model 开启 风扇 五 秒: -3093132.8503297823
Score for model 开启 阀门: -1937843.2030556912
Score for model 开启 风扇 九 分钟: -2661464.99469183
Score for model 关掉 风扇 三 秒: -1960564.9976841458
Score for model 关 风扇 四 秒: -3011337.919092221
Score for model 关掉 阀门 四 秒: -3303418.960012782
Score for model 关闭 灯 二 小时: -2467106.100198934
Score for model 打开 灯 二 小时: -2126018.882682267
Score for model 关 灯 五 分钟: -3056415.8783982904
Score for model 打开 风扇: -1848372.8363280054
Score for model 关掉 灯 七 秒: -1909920.8745258683
Score for model 关掉 风扇 五 小时: -3040371.446998456
Score for model 打开 风扇 四 分钟: -2825712.3569186134
Score for model 关掉 阀门 六 分钟: -3483477.5340211564
Score for model 开启 灯 一