In [None]:
import os
import wave
import numpy as np
import librosa
from utils import endpoint_detection, compute_mfcc
from tqdm import tqdm
import random

def extract_mfcc(file_path):
    """
    提取MFCC特征
    """
    with wave.open(file_path, 'rb') as wf:
        nframes = wf.getnframes()
        rate = wf.getframerate()
        strData = wf.readframes(nframes)
        waveData = np.frombuffer(strData, dtype=np.int16)

    # 端点检测
    s, e = endpoint_detection(waveData)
    signal = waveData[s: e]*1.0
    # mfccs = librosa.feature.mfcc(y=signal, sr=rate, n_mfcc=40, n_mels=128, n_fft=256, hop_length=80, win_length=256, lifter=12)
    mfccs = compute_mfcc(signal).T
    mean = np.mean(mfccs, axis=1, keepdims=True)
    std = np.std(mfccs, axis=1, keepdims=True)
    fea = (mfccs-mean)/std
    # 添加一阶差分和二阶差分
    fea_d = librosa.feature.delta(fea, order=1)
    fea_dd = librosa.feature.delta(fea, order=2)
    fea = np.concatenate((fea.T, fea_d.T, fea_dd.T), axis=1)
    return fea

In [2]:
# 测试部分
models = np.load("models_hmmlearn_0610.npy", allow_pickle=True)

# val_sids = []
sids = [sid for sid in os.listdir("dataset")]

def calculate_accuracy(sid):
    count = 0
    base_path = os.path.join("dataset", sid)
    files = os.listdir(base_path)
    data_paths = [os.path.join(base_path, file) for file in files]
    wrong_cnt = {"01": 0, "02": 0, "03": 0, "04": 0, "05": 0, "06": 0, "07": 0, "08": 0, "09": 0, "10": 0, 
                  "11": 0, "12": 0, "13": 0, "14": 0, "15": 0, "16": 0, "17": 0, "18": 0, "19": 0, "20": 0}

    for file_path in tqdm(data_paths):
        id, word, cnt = os.path.basename(file_path).split("_")
        fea = extract_mfcc(file_path)
        label_true = int(word)
        scores = []
        for m in range(20):
            model = models[m]
            score, _ = model.decode(fea)
            scores.append(score)
        # print(scores)

        # 根据分值进行识别
        det_lab = np.argmax(scores)+1
        if det_lab == label_true:
            count = count+1
        else:
            wrong_cnt[word] += 1
            # print("true lab %d det lab %d" % (label_true, det_lab))
    print("%s : accuracy %.2f %%" % (sid, count*100/len(data_paths)))
    print("wrong cnt: ", wrong_cnt)

for sid in sids:
    calculate_accuracy(sid)
    # break

100%|██████████| 400/400 [02:13<00:00,  2.99it/s]


21307110148 : accuracy 90.50 %
wrong cnt:  {'01': 0, '02': 0, '03': 1, '04': 0, '05': 4, '06': 0, '07': 18, '08': 1, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 4, '15': 0, '16': 3, '17': 1, '18': 1, '19': 5, '20': 0}


100%|██████████| 400/400 [01:34<00:00,  4.22it/s]


21307110234 : accuracy 96.00 %
wrong cnt:  {'01': 0, '02': 4, '03': 5, '04': 4, '05': 0, '06': 0, '07': 1, '08': 0, '09': 0, '10': 1, '11': 1, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:09<00:00,  5.73it/s]


21307110316 : accuracy 55.00 %
wrong cnt:  {'01': 10, '02': 18, '03': 13, '04': 8, '05': 13, '06': 20, '07': 0, '08': 1, '09': 5, '10': 1, '11': 13, '12': 7, '13': 17, '14': 2, '15': 19, '16': 1, '17': 19, '18': 3, '19': 0, '20': 10}


100%|██████████| 400/400 [01:32<00:00,  4.32it/s]


21307130043 : accuracy 99.50 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 1, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 1, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:10<00:00,  5.68it/s]


21307130050 : accuracy 89.50 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 0, '05': 4, '06': 0, '07': 0, '08': 15, '09': 0, '10': 0, '11': 2, '12': 7, '13': 0, '14': 0, '15': 3, '16': 1, '17': 10, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [02:05<00:00,  3.18it/s]


21307130052 : accuracy 96.50 %
wrong cnt:  {'01': 0, '02': 1, '03': 0, '04': 0, '05': 0, '06': 0, '07': 7, '08': 6, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:17<00:00,  5.17it/s]


21307130121 : accuracy 99.50 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0, '13': 1, '14': 0, '15': 0, '16': 0, '17': 0, '18': 1, '19': 0, '20': 0}


100%|██████████| 400/400 [01:04<00:00,  6.16it/s]


21307130150 : accuracy 97.75 %
wrong cnt:  {'01': 0, '02': 2, '03': 0, '04': 0, '05': 0, '06': 0, '07': 0, '08': 4, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 1, '17': 0, '18': 1, '19': 1, '20': 0}


100%|██████████| 400/400 [01:28<00:00,  4.50it/s]


21307130179 : accuracy 60.50 %
wrong cnt:  {'01': 8, '02': 16, '03': 17, '04': 8, '05': 10, '06': 12, '07': 0, '08': 1, '09': 8, '10': 12, '11': 12, '12': 10, '13': 6, '14': 0, '15': 10, '16': 3, '17': 14, '18': 4, '19': 0, '20': 7}


100%|██████████| 400/400 [01:03<00:00,  6.35it/s]


22300180008 : accuracy 99.75 %
wrong cnt:  {'01': 0, '02': 0, '03': 1, '04': 0, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [00:43<00:00,  9.11it/s]


22300240004 : accuracy 98.75 %
wrong cnt:  {'01': 0, '02': 2, '03': 0, '04': 0, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 1, '13': 1, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 1, '20': 0}


100%|██████████| 400/400 [01:45<00:00,  3.77it/s]


22300240007 : accuracy 99.25 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 1, '05': 0, '06': 0, '07': 0, '08': 1, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 1, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:13<00:00,  5.47it/s]


22300240023 : accuracy 5.00 %
wrong cnt:  {'01': 20, '02': 20, '03': 20, '04': 20, '05': 20, '06': 20, '07': 20, '08': 20, '09': 20, '10': 20, '11': 20, '12': 20, '13': 20, '14': 20, '15': 20, '16': 0, '17': 20, '18': 20, '19': 20, '20': 20}


100%|██████████| 400/400 [00:58<00:00,  6.83it/s]


22307110035 : accuracy 99.75 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 1, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:05<00:00,  6.08it/s]


22307130013 : accuracy 99.75 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 1, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:53<00:00,  3.52it/s]


22307130038 : accuracy 75.25 %
wrong cnt:  {'01': 2, '02': 3, '03': 6, '04': 2, '05': 7, '06': 11, '07': 10, '08': 4, '09': 5, '10': 9, '11': 2, '12': 4, '13': 2, '14': 5, '15': 3, '16': 2, '17': 0, '18': 7, '19': 8, '20': 7}


100%|██████████| 400/400 [01:23<00:00,  4.81it/s]


22307130082 : accuracy 85.75 %
wrong cnt:  {'01': 1, '02': 3, '03': 10, '04': 3, '05': 4, '06': 8, '07': 0, '08': 1, '09': 1, '10': 2, '11': 1, '12': 7, '13': 1, '14': 1, '15': 2, '16': 5, '17': 1, '18': 0, '19': 0, '20': 6}


100%|██████████| 400/400 [01:30<00:00,  4.41it/s]


22307130143 : accuracy 95.50 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0, '06': 4, '07': 4, '08': 2, '09': 0, '10': 0, '11': 0, '12': 1, '13': 2, '14': 2, '15': 0, '16': 3, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:14<00:00,  5.34it/s]


22307130220 : accuracy 85.00 %
wrong cnt:  {'01': 0, '02': 14, '03': 6, '04': 0, '05': 12, '06': 0, '07': 10, '08': 2, '09': 0, '10': 4, '11': 0, '12': 2, '13': 0, '14': 0, '15': 2, '16': 0, '17': 8, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [00:59<00:00,  6.77it/s]


22307130224 : accuracy 98.00 %
wrong cnt:  {'01': 0, '02': 2, '03': 2, '04': 0, '05': 1, '06': 0, '07': 1, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 1, '16': 0, '17': 1, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [00:58<00:00,  6.86it/s]


22307130229 : accuracy 100.00 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0, '06': 0, '07': 0, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 0, '15': 0, '16': 0, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [01:11<00:00,  5.63it/s]


22307130313 : accuracy 99.25 %
wrong cnt:  {'01': 0, '02': 0, '03': 0, '04': 0, '05': 0, '06': 0, '07': 1, '08': 0, '09': 0, '10': 0, '11': 0, '12': 0, '13': 0, '14': 1, '15': 0, '16': 1, '17': 0, '18': 0, '19': 0, '20': 0}


100%|██████████| 400/400 [02:46<00:00,  2.41it/s]


22307130505 : accuracy 81.25 %
wrong cnt:  {'01': 0, '02': 10, '03': 0, '04': 5, '05': 1, '06': 2, '07': 15, '08': 11, '09': 6, '10': 7, '11': 1, '12': 5, '13': 0, '14': 0, '15': 5, '16': 0, '17': 2, '18': 1, '19': 3, '20': 1}


100%|██████████| 400/400 [01:29<00:00,  4.45it/s]

23300240026 : accuracy 86.00 %
wrong cnt:  {'01': 0, '02': 1, '03': 3, '04': 2, '05': 3, '06': 2, '07': 0, '08': 0, '09': 0, '10': 0, '11': 3, '12': 18, '13': 4, '14': 0, '15': 2, '16': 0, '17': 10, '18': 2, '19': 0, '20': 6}



