In [2]:
from torch.utils.data import Dataset, DataLoader
import wave
import numpy as np
import librosa
from utils import endpoint_detection, compute_mfcc
from tqdm import tqdm
import torch
import os
os.environ["OMP_NUM_THREADS"] = "2"

def extract_mfcc(file_path):
    """
    提取MFCC特征
    """
    with wave.open(file_path, 'rb') as wf:
        nframes = wf.getnframes()
        rate = wf.getframerate()
        strData = wf.readframes(nframes)
        waveData = np.frombuffer(strData, dtype=np.int16)

    # 端点检测
    s, e = endpoint_detection(waveData)
    signal = waveData[s: e]*1.0
    # mfccs = librosa.feature.mfcc(y=signal, sr=rate, n_mfcc=40, n_mels=128, n_fft=256, hop_length=80, win_length=256, lifter=12)
    mfccs = compute_mfcc(signal).T
    mean = np.mean(mfccs, axis=1, keepdims=True)
    std = np.std(mfccs, axis=1, keepdims=True)
    fea = (mfccs-mean)/std
    # 添加一阶差分和二阶差分
    fea_d = librosa.feature.delta(fea, order=1)
    fea_dd = librosa.feature.delta(fea, order=2)
    fea = np.concatenate((fea.T, fea_d.T, fea_dd.T), axis=1)
    return fea

In [3]:
from sklearn.cluster import KMeans

def run_kmeans(dataset, K):
    labs = KMeans(n_clusters=K, random_state=9).fit_predict(dataset)
    return labs

def gen_para_GMM(fea_collect, N_mix):
    # 首先对特征进行kmeans聚类
    feas = np.concatenate(fea_collect, axis=0)
    N, D = np.shape(feas)
    # print("sub_fea_shape", feas.shape)
    # 初始化聚类中心
    labs = run_kmeans(feas, N_mix)
    mus = np.zeros([N_mix, D])
    sigmas = np.zeros([N_mix, D])
    ws = np.zeros(N_mix)
    for m in range(N_mix):
        index = np.where(labs == m)[0]
        # print("-----index-------", index)
        sub_feas = feas[index]
        mu = np.mean(sub_feas, axis=0)
        sigma = np.var(sub_feas, axis=0)
        sigma = sigma + 0.0001
        mus[m] = mu
        sigmas[m] = sigma

        # print("-----N D-------", N, np.shape(index)[0])
        ws[m] = np.shape(index)[0] / N
    ws = (ws + 0.01) / np.sum(ws + 0.01)
    return ws, mus, sigmas

In [4]:
import numpy as np

def init_para_hmm(feas, len_feas, N_state, N_mix):
    # 初始一定从 state 0 开始
    pi = np.zeros(N_state)
    pi[0] = 1

    # 当前状态 转移概率0.5 下一状态 转移概率0.5
    # 进入最后一个状态后不再跳出
    A = np.zeros([N_state, N_state])
    for i in range(N_state-1):
        A[i, i] = 0.5
        A[i, i+1] = 0.5
    A[-1, -1] = 1

    _, D = np.shape(feas[0])
    hmm_means = np.zeros([N_state, N_mix, D])
    hmm_sigmas = np.zeros([N_state, N_mix, D])
    hmm_ws = np.zeros([N_state, N_mix])

    for s in range(N_state):
        sub_fea_collect = []
        # 初始化时 先为每个状态平均分配特征
        for fea, T in zip(feas, len_feas):
            T_s = int(T/N_state)*s
            T_e = (int(T/N_state))*(s+1)
            sub_fea_collect.append(fea[T_s:T_e])
        ws, mus, sigmas = gen_para_GMM(sub_fea_collect, N_mix)
        hmm_means[s] = mus
        hmm_sigmas[s] = sigmas
        hmm_ws[s] = ws

    return pi, A, hmm_means, hmm_sigmas, hmm_ws

In [5]:
# val_sids = ["21300240018", "22307110035"]
val_sids = ["22307130313", "22307130505", "22307110035"]
train_sids = [sid for sid in os.listdir("dataset") if sid not in val_sids]

def load_data(word_idx, sids):
    data_paths = []
    for sid in sids:
        base_path = os.path.join("dataset", sid)
        files = os.listdir(base_path)
        full_paths = [os.path.join(base_path, file) for file in files]
        data_paths.extend(full_paths)

    collect_fea = []
    len_feas = []

    # 构建数据和标签
    for file_path in tqdm(data_paths):
        id, word, cnt = os.path.basename(file_path).split("_")
        # print(id, word, cnt)
        if word == str(word_idx).zfill(2):
            fea = extract_mfcc(file_path)
            collect_fea.append(fea)
            len_feas.append(np.shape(fea)[0])

    return collect_fea, len_feas

In [7]:
from hmmlearn.hmm import GMMHMM
# 获取模型参数初始化
N_state = 6
N_mix = 5

models = []

for i in range(1, 21):
    collect_fea, len_feas = load_data(i, train_sids)
    collect_fea = collect_fea
    len_feas = len_feas
    print(len(collect_fea), " files for word ", i)
    pi, A, hmm_means, hmm_sigmas, hmm_ws = init_para_hmm(collect_fea, len_feas, N_state, N_mix)

    train_GMMHMM = GMMHMM(n_components=N_state,
                        n_mix=N_mix,
                        covariance_type='diag',
                        n_iter=1,
                        tol=1e-4,
                        verbose=False,
                        init_params="",
                        params="tmcw",
                        min_covar=0.0001,
                        )

    train_GMMHMM.startprob_ = pi
    train_GMMHMM.transmat_ = A

    train_GMMHMM.weights = hmm_ws
    train_GMMHMM.means_ = hmm_means
    train_GMMHMM.covars = hmm_sigmas

    best_model = None
    max_val_score = -np.inf
    max_train_score = -np.inf
    patience = 3
    no_improve_count = 0

    val_fea, _ = load_data(i, val_sids)
    for epoch in range(30):  # 最多训练30轮
        train_GMMHMM.fit(np.concatenate(collect_fea, axis=0), np.array(len_feas))

        val_score = np.mean([train_GMMHMM.score(fea)/len(fea) for fea in val_fea])
        print(f"Epoch {epoch+1}, Val Score: {val_score:.2f}")
        train_score = np.mean([train_GMMHMM.score(fea)/len(fea) for fea in collect_fea])
        print(f"Epoch {epoch+1}, Train Score: {train_score:.2f}")

        if val_score > max_val_score - 0.1 and train_score > max_train_score + 1:
            if train_score > max_train_score:
                max_train_score = train_score
            if val_score > max_val_score:
                max_val_score = val_score
            # best_model = GMMHMM(n_components=N_state,
            #                     n_mix=N_mix,
            #                     covariance_type='diag')
            # best_model.__setstate__(train_GMMHMM.__getstate__())
            no_improve_count = 0
        elif val_score < max_val_score and train_score < max_train_score:
            print(f"Epoch {epoch+1}, No improvement in validation or training score.")
            break
        else:
            no_improve_count += 1
            if no_improve_count >= patience:
                print("Early stopping triggered.")
                break
    models.append(train_GMMHMM)

np.save("models_hmmlearn_0610.npy", models)

100%|██████████| 8400/8400 [01:18<00:00, 107.57it/s]


420  files for word  1


100%|██████████| 1200/1200 [00:12<00:00, 95.15it/s]


Epoch 1, Val Score: -1.45
Epoch 1, Train Score: 5.86
Epoch 2, Val Score: -0.90
Epoch 2, Train Score: 7.24
Epoch 3, Val Score: -0.64
Epoch 3, Train Score: 7.88
Epoch 4, Val Score: -0.47
Epoch 4, Train Score: 8.30
Epoch 5, Val Score: -0.37
Epoch 5, Train Score: 8.60
Epoch 6, Val Score: -0.29
Epoch 6, Train Score: 8.80
Epoch 7, Val Score: -0.24
Epoch 7, Train Score: 8.90
Early stopping triggered.


100%|██████████| 8400/8400 [01:06<00:00, 126.01it/s]


420  files for word  2


100%|██████████| 1200/1200 [00:14<00:00, 83.22it/s]


Epoch 1, Val Score: 5.92
Epoch 1, Train Score: 10.76
Epoch 2, Val Score: 6.16
Epoch 2, Train Score: 11.85
Epoch 3, Val Score: 6.42
Epoch 3, Train Score: 12.41
Epoch 4, Val Score: 6.56
Epoch 4, Train Score: 12.78
Epoch 5, Val Score: 6.70
Epoch 5, Train Score: 13.06
Epoch 6, Val Score: 6.82
Epoch 6, Train Score: 13.31
Epoch 7, Val Score: 6.88
Epoch 7, Train Score: 13.42
Epoch 8, Val Score: 6.89
Epoch 8, Train Score: 13.49
Early stopping triggered.


100%|██████████| 8400/8400 [01:13<00:00, 113.78it/s]


420  files for word  3


100%|██████████| 1200/1200 [00:12<00:00, 93.53it/s]


Epoch 1, Val Score: 8.71
Epoch 1, Train Score: 11.49
Epoch 2, Val Score: 9.34
Epoch 2, Train Score: 12.68
Epoch 3, Val Score: 9.57
Epoch 3, Train Score: 13.16
Epoch 4, Val Score: 9.70
Epoch 4, Train Score: 13.41
Epoch 5, Val Score: 9.82
Epoch 5, Train Score: 13.60
Early stopping triggered.


100%|██████████| 8400/8400 [01:15<00:00, 110.90it/s]


420  files for word  4


100%|██████████| 1200/1200 [00:13<00:00, 89.88it/s]


Epoch 1, Val Score: -1.01
Epoch 1, Train Score: 5.35
Epoch 2, Val Score: -0.50
Epoch 2, Train Score: 6.93
Epoch 3, Val Score: -0.29
Epoch 3, Train Score: 7.68
Epoch 4, Val Score: -0.15
Epoch 4, Train Score: 8.04
Epoch 5, Val Score: -0.04
Epoch 5, Train Score: 8.20
Epoch 6, Val Score: 0.04
Epoch 6, Train Score: 8.35
Epoch 7, Val Score: 0.10
Epoch 7, Train Score: 8.54
Early stopping triggered.


100%|██████████| 8400/8400 [01:22<00:00, 101.88it/s]


420  files for word  5


100%|██████████| 1200/1200 [00:15<00:00, 77.25it/s]


Epoch 1, Val Score: 1.41
Epoch 1, Train Score: 8.68
Epoch 2, Val Score: 1.94
Epoch 2, Train Score: 10.10
Epoch 3, Val Score: 2.16
Epoch 3, Train Score: 10.62
Epoch 4, Val Score: 2.30
Epoch 4, Train Score: 10.90
Epoch 5, Val Score: 2.37
Epoch 5, Train Score: 11.13
Epoch 6, Val Score: 2.40
Epoch 6, Train Score: 11.35
Epoch 7, Val Score: 2.39
Epoch 7, Train Score: 11.61
Epoch 8, Val Score: 2.37
Epoch 8, Train Score: 11.76
Early stopping triggered.


100%|██████████| 8400/8400 [01:24<00:00, 99.03it/s] 


420  files for word  6


100%|██████████| 1200/1200 [00:12<00:00, 92.51it/s]


Epoch 1, Val Score: 2.62
Epoch 1, Train Score: 9.04
Epoch 2, Val Score: 3.40
Epoch 2, Train Score: 10.82
Epoch 3, Val Score: 3.61
Epoch 3, Train Score: 11.59
Epoch 4, Val Score: 3.77
Epoch 4, Train Score: 12.06
Epoch 5, Val Score: 3.87
Epoch 5, Train Score: 12.31
Epoch 6, Val Score: 3.92
Epoch 6, Train Score: 12.52
Epoch 7, Val Score: 3.92
Epoch 7, Train Score: 12.62
Early stopping triggered.


100%|██████████| 8400/8400 [01:19<00:00, 106.13it/s]


420  files for word  7


100%|██████████| 1200/1200 [00:13<00:00, 91.17it/s]


Epoch 1, Val Score: 0.87
Epoch 1, Train Score: 9.15
Epoch 2, Val Score: 1.09
Epoch 2, Train Score: 10.82
Epoch 3, Val Score: 1.13
Epoch 3, Train Score: 11.52
Epoch 4, Val Score: 1.17
Epoch 4, Train Score: 11.78
Epoch 5, Val Score: 1.20
Epoch 5, Train Score: 11.96
Epoch 6, Val Score: 1.22
Epoch 6, Train Score: 12.08
Epoch 7, Val Score: 1.26
Epoch 7, Train Score: 12.15
Epoch 8, Val Score: 1.29
Epoch 8, Train Score: 12.22
Early stopping triggered.


100%|██████████| 8400/8400 [01:10<00:00, 119.46it/s]


420  files for word  8


100%|██████████| 1200/1200 [00:11<00:00, 101.59it/s]


Epoch 1, Val Score: -1.03
Epoch 1, Train Score: 6.49
Epoch 2, Val Score: -0.55
Epoch 2, Train Score: 8.62
Epoch 3, Val Score: -0.37
Epoch 3, Train Score: 9.37
Epoch 4, Val Score: -0.23
Epoch 4, Train Score: 9.71
Epoch 5, Val Score: -0.05
Epoch 5, Train Score: 10.00
Epoch 6, Val Score: 0.15
Epoch 6, Train Score: 10.24
Epoch 7, Val Score: 0.24
Epoch 7, Train Score: 10.34
Early stopping triggered.


100%|██████████| 8400/8400 [01:28<00:00, 94.52it/s] 


420  files for word  9


100%|██████████| 1200/1200 [00:13<00:00, 85.82it/s]


Epoch 1, Val Score: 2.47
Epoch 1, Train Score: 11.08
Epoch 2, Val Score: 3.20
Epoch 2, Train Score: 12.94
Epoch 3, Val Score: 3.54
Epoch 3, Train Score: 13.94
Epoch 4, Val Score: 3.67
Epoch 4, Train Score: 14.77
Epoch 5, Val Score: 3.77
Epoch 5, Train Score: 15.40
Epoch 6, Val Score: 3.91
Epoch 6, Train Score: 15.58
Epoch 7, Val Score: 4.00
Epoch 7, Train Score: 15.71
Epoch 8, Val Score: 4.05
Epoch 8, Train Score: 15.81
Early stopping triggered.


100%|██████████| 8400/8400 [01:23<00:00, 100.09it/s]


420  files for word  10


100%|██████████| 1200/1200 [00:13<00:00, 87.17it/s]


Epoch 1, Val Score: 5.60
Epoch 1, Train Score: 12.52
Epoch 2, Val Score: 5.98
Epoch 2, Train Score: 14.43
Epoch 3, Val Score: 6.12
Epoch 3, Train Score: 15.33
Epoch 4, Val Score: 6.21
Epoch 4, Train Score: 15.76
Epoch 5, Val Score: 6.27
Epoch 5, Train Score: 15.89
Epoch 6, Val Score: 6.35
Epoch 6, Train Score: 15.99
Epoch 7, Val Score: 6.46
Epoch 7, Train Score: 16.08
Early stopping triggered.


100%|██████████| 8400/8400 [01:24<00:00, 99.15it/s] 


420  files for word  11


100%|██████████| 1200/1200 [00:12<00:00, 93.91it/s]


Epoch 1, Val Score: -1.83
Epoch 1, Train Score: 1.77
Epoch 2, Val Score: -1.04
Epoch 2, Train Score: 3.58
Epoch 3, Val Score: -0.81
Epoch 3, Train Score: 4.46
Epoch 4, Val Score: -0.76
Epoch 4, Train Score: 4.90
Epoch 5, Val Score: -0.71
Epoch 5, Train Score: 5.15
Epoch 6, Val Score: -0.66
Epoch 6, Train Score: 5.34
Epoch 7, Val Score: -0.64
Epoch 7, Train Score: 5.47
Early stopping triggered.


100%|██████████| 8400/8400 [01:16<00:00, 109.20it/s]


420  files for word  12


100%|██████████| 1200/1200 [00:11<00:00, 104.67it/s]


Epoch 1, Val Score: -0.31
Epoch 1, Train Score: 3.86
Epoch 2, Val Score: 0.19
Epoch 2, Train Score: 4.99
Epoch 3, Val Score: 0.42
Epoch 3, Train Score: 5.50
Epoch 4, Val Score: 0.60
Epoch 4, Train Score: 5.79
Epoch 5, Val Score: 0.72
Epoch 5, Train Score: 5.98
Early stopping triggered.


100%|██████████| 8400/8400 [01:18<00:00, 106.55it/s]


420  files for word  13


100%|██████████| 1200/1200 [00:11<00:00, 101.32it/s]


Epoch 1, Val Score: -1.63
Epoch 1, Train Score: 6.19
Epoch 2, Val Score: -1.11
Epoch 2, Train Score: 7.85
Epoch 3, Val Score: -0.95
Epoch 3, Train Score: 8.66
Epoch 4, Val Score: -0.85
Epoch 4, Train Score: 9.02
Epoch 5, Val Score: -0.75
Epoch 5, Train Score: 9.24
Epoch 6, Val Score: -0.69
Epoch 6, Train Score: 9.44
Epoch 7, Val Score: -0.65
Epoch 7, Train Score: 9.63
Early stopping triggered.


100%|██████████| 8400/8400 [01:16<00:00, 109.48it/s]


420  files for word  14


100%|██████████| 1200/1200 [00:11<00:00, 104.51it/s]


Epoch 1, Val Score: -1.74
Epoch 1, Train Score: 4.20
Epoch 2, Val Score: -1.43
Epoch 2, Train Score: 5.72
Epoch 3, Val Score: -1.31
Epoch 3, Train Score: 6.56
Epoch 4, Val Score: -1.20
Epoch 4, Train Score: 6.93
Epoch 5, Val Score: -1.15
Epoch 5, Train Score: 7.08
Epoch 6, Val Score: -1.13
Epoch 6, Train Score: 7.19
Epoch 7, Val Score: -1.12
Epoch 7, Train Score: 7.26
Early stopping triggered.


100%|██████████| 8400/8400 [01:18<00:00, 107.06it/s]


420  files for word  15


100%|██████████| 1200/1200 [00:12<00:00, 99.44it/s] 


Epoch 1, Val Score: -2.22
Epoch 1, Train Score: 4.16
Epoch 2, Val Score: -1.84
Epoch 2, Train Score: 5.25
Epoch 3, Val Score: -1.69
Epoch 3, Train Score: 5.87
Epoch 4, Val Score: -1.56
Epoch 4, Train Score: 6.31
Epoch 5, Val Score: -1.46
Epoch 5, Train Score: 6.54
Epoch 6, Val Score: -1.39
Epoch 6, Train Score: 6.65
Epoch 7, Val Score: -1.33
Epoch 7, Train Score: 6.81
Early stopping triggered.


100%|██████████| 8400/8400 [01:05<00:00, 128.82it/s]


420  files for word  16


100%|██████████| 1200/1200 [00:13<00:00, 90.79it/s] 


Epoch 1, Val Score: -3.16
Epoch 1, Train Score: 3.17
Epoch 2, Val Score: -2.84
Epoch 2, Train Score: 4.67
Epoch 3, Val Score: -2.73
Epoch 3, Train Score: 5.56
Epoch 4, Val Score: -2.60
Epoch 4, Train Score: 6.00
Epoch 5, Val Score: -2.46
Epoch 5, Train Score: 6.36
Epoch 6, Val Score: -2.35
Epoch 6, Train Score: 6.88
Epoch 7, Val Score: -2.27
Epoch 7, Train Score: 7.21
Epoch 8, Val Score: -2.21
Epoch 8, Train Score: 7.84
Epoch 9, Val Score: -2.14
Epoch 9, Train Score: 9.67
Epoch 10, Val Score: -2.11
Epoch 10, Train Score: 13.87
Epoch 11, Val Score: -2.08
Epoch 11, Train Score: 47.40
Epoch 12, Val Score: -2.08
Epoch 12, Train Score: 47.43
Epoch 13, Val Score: -2.06
Epoch 13, Train Score: 47.46
Epoch 14, Val Score: -2.05
Epoch 14, Train Score: 47.48
Early stopping triggered.


100%|██████████| 8400/8400 [01:24<00:00, 99.98it/s] 


420  files for word  17


100%|██████████| 1200/1200 [00:12<00:00, 95.65it/s] 


Epoch 1, Val Score: -3.31
Epoch 1, Train Score: 3.66
Epoch 2, Val Score: -2.80
Epoch 2, Train Score: 5.06
Epoch 3, Val Score: -2.60
Epoch 3, Train Score: 5.77
Epoch 4, Val Score: -2.38
Epoch 4, Train Score: 6.31
Epoch 5, Val Score: -2.22
Epoch 5, Train Score: 6.70
Epoch 6, Val Score: -2.14
Epoch 6, Train Score: 6.91
Epoch 7, Val Score: -2.09
Epoch 7, Train Score: 7.03
Early stopping triggered.


100%|██████████| 8400/8400 [01:13<00:00, 114.07it/s]


420  files for word  18


100%|██████████| 1200/1200 [00:08<00:00, 137.46it/s]


Epoch 1, Val Score: 1.50
Epoch 1, Train Score: 10.58
Epoch 2, Val Score: 1.65
Epoch 2, Train Score: 12.22
Epoch 3, Val Score: 1.79
Epoch 3, Train Score: 13.09
Epoch 4, Val Score: 1.87
Epoch 4, Train Score: 13.59
Epoch 5, Val Score: 1.95
Epoch 5, Train Score: 13.85
Epoch 6, Val Score: 1.99
Epoch 6, Train Score: 14.04
Epoch 7, Val Score: 2.08
Epoch 7, Train Score: 14.15
Early stopping triggered.


100%|██████████| 8400/8400 [01:09<00:00, 120.76it/s]


420  files for word  19


100%|██████████| 1200/1200 [00:11<00:00, 101.04it/s]


Epoch 1, Val Score: -0.69
Epoch 1, Train Score: 8.18
Epoch 2, Val Score: -0.30
Epoch 2, Train Score: 9.79
Epoch 3, Val Score: -0.19
Epoch 3, Train Score: 10.68
Epoch 4, Val Score: -0.15
Epoch 4, Train Score: 11.06
Epoch 5, Val Score: -0.11
Epoch 5, Train Score: 11.53
Epoch 6, Val Score: -0.13
Epoch 6, Train Score: 11.79
Epoch 7, Val Score: -0.15
Epoch 7, Train Score: 11.92
Early stopping triggered.


100%|██████████| 8400/8400 [01:14<00:00, 113.14it/s]


420  files for word  20


100%|██████████| 1200/1200 [00:12<00:00, 93.75it/s] 


Epoch 1, Val Score: 1.14
Epoch 1, Train Score: 8.14
Epoch 2, Val Score: 1.91
Epoch 2, Train Score: 9.82
Epoch 3, Val Score: 2.14
Epoch 3, Train Score: 10.66
Epoch 4, Val Score: 2.22
Epoch 4, Train Score: 11.37
Epoch 5, Val Score: 2.18
Epoch 5, Train Score: 11.76
Epoch 6, Val Score: 2.15
Epoch 6, Train Score: 12.03
Epoch 7, Val Score: 2.13
Epoch 7, Train Score: 12.20
Early stopping triggered.
