In [8]:
import librosa
import pandas as pd
import numpy as np
import os
from config import *
from scipy.stats import kurtosis, skew


def amplitude(y):
    target_db = -40  # 目标分贝值
    db = librosa.amplitude_to_db(y)
    gain = librosa.db_to_amplitude(target_db - db.mean())  # 计算增益系数
    y_agc = y * gain
    return y_agc


def extract_mfcc(wav_path, sr=44100):
    """
    从给定路径的音频文件中提取MFCC特征

    Args:
        wav_path (str): 音频文件的路径
        sr (int): 音频采样率，默认为44100

    Returns:
        numpy.ndarray: 提取的MFCC特征矩阵，维度为(n_mfcc, t)
    """
    # 设置参数
    # n_mfcc = 20  # MFCC系数的数量
    # hop_length = 512  # 每帧的时间长度
    # n_fft = 2048  # FFT窗口大小
    # y, sr = librosa.load(wav_path, sr=None)
    y, sr = librosa.load(wav_path, sr=sr)
    # 都增益为相同大小的分贝
    y = amplitude(y)
    mfcc = librosa.feature.mfcc(y=y, sr=sr, n_mfcc=13)
    return mfcc.T


def first_order_diff_mean_var(x):
    diff = np.diff(x, n=1)
    mean = np.mean(diff)
    var = np.var(diff)
    return mean, var


def second_order_diff_mean(x):
    diff = np.diff(x, n=2)
    mean = np.mean(diff)
    return mean


In [9]:
audio_dir = r'.\input\audio'
score = pd.read_excel(r'.\input\score.xlsx')
data = pd.DataFrame(columns=columns)
data['category'] = score['folder_name']
data['file_name'] = score['file_name']
data['score'] = score['score']
data

Unnamed: 0,category,file_name,score,1_mean,2_mean,3_mean,4_mean,5_mean,6_mean,7_mean,...,4_diff_2_mean,5_diff_2_mean,6_diff_2_mean,7_diff_2_mean,8_diff_2_mean,9_diff_2_mean,10_diff_2_mean,11_diff_2_mean,12_diff_2_mean,13_diff_2_mean
0,200,001-143-L-11.wav,6.000000,,,,,,,,...,,,,,,,,,,
1,200,002-127-L-11.wav,6.633333,,,,,,,,...,,,,,,,,,,
2,200,003-154-L-11.wav,6.100000,,,,,,,,...,,,,,,,,,,
3,200,004-131-L-3.wav,6.700000,,,,,,,,...,,,,,,,,,,
4,200,005-2023-04-09_05h17m42s.wav,5.533333,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,80,ns11-N601-1050.wav,5.925000,,,,,,,,...,,,,,,,,,,
359,80,ns12-N602-0859.wav,5.200000,,,,,,,,...,,,,,,,,,,
360,80,ns13-N603-1527.wav,4.250000,,,,,,,,...,,,,,,,,,,
361,80,ns14-N604-1029.wav,5.750000,,,,,,,,...,,,,,,,,,,


In [13]:
for i in range(len(data)):
    try:
        wav_path = os.path.join(audio_dir, str(data.loc[i, 'category']), str(data.loc[i, 'file_name']))
        mfcc = extract_mfcc(wav_path)
        means = np.mean(mfcc, axis=0)
        kurtosis_list = np.apply_along_axis(func1d=kurtosis, axis=0, arr=mfcc)
        skewness_list = np.apply_along_axis(func1d=skew, axis=0, arr=mfcc)
        first_mean, first_var = np.apply_along_axis(func1d=first_order_diff_mean_var, axis=0, arr=mfcc)
        second_mean = np.apply_along_axis(func1d=second_order_diff_mean, axis=0, arr=mfcc)

        data.loc[i, "1_mean":"13_mean"] = means
        data.loc[i, "1_kurtosis":"13_kurtosis"] = kurtosis_list
        data.loc[i, "1_skew":"13_skew"] = skewness_list
        data.loc[i, "1_diff_1_mean":"13_diff_1_mean"] = first_mean
        data.loc[i, "1_diff_1_std":"13_diff_1_std"] = first_var
        data.loc[i, "1_diff_2_mean":"13_diff_2_mean"] = second_mean

    except Exception as e:
        print(f"Failed to load {wav_path}: {str(e)}")
        continue

data

Unnamed: 0,category,file_name,score,1_mean,2_mean,3_mean,4_mean,5_mean,6_mean,7_mean,...,4_diff_2_mean,5_diff_2_mean,6_diff_2_mean,7_diff_2_mean,8_diff_2_mean,9_diff_2_mean,10_diff_2_mean,11_diff_2_mean,12_diff_2_mean,13_diff_2_mean
0,200,001-143-L-11.wav,6.000000,-382.383118,129.699524,-24.393963,27.436533,-4.836144,21.969589,5.772991,...,-0.004324,0.029115,-0.011319,0.004951,0.006631,-0.019848,-0.013718,-0.013251,-0.013526,-0.001809
1,200,002-127-L-11.wav,6.633333,-264.106262,122.780869,-48.852364,30.297501,-18.720873,7.887349,-12.326442,...,-0.015862,-0.00564,-0.014478,-0.007142,-0.003026,0.011058,0.003039,-0.007403,-0.015382,-0.003262
2,200,003-154-L-11.wav,6.100000,-355.073517,94.369255,-25.467772,38.089897,3.871238,25.670464,0.307353,...,-0.009421,0.015123,-0.011728,0.002192,-0.007242,0.004647,-0.015387,-0.008264,-0.007631,-0.008146
3,200,004-131-L-3.wav,6.700000,-316.15387,127.803688,-32.442688,10.504256,-13.509813,16.713568,-4.463919,...,0.002864,0.040273,0.002317,0.014188,-0.010892,0.005344,-0.010376,-0.000062,-0.002669,-0.004549
4,200,005-2023-04-09_05h17m42s.wav,5.533333,-378.653351,167.027374,14.113789,14.079938,14.309244,6.560044,7.098984,...,0.005915,-0.005486,-0.006482,-0.004301,-0.010414,-0.00861,-0.001221,0.003829,0.001279,0.008912
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
358,80,ns11-N601-1050.wav,5.925000,-424.095917,199.589035,-27.47884,55.143353,28.570026,-16.808243,28.513512,...,-0.010207,0.000527,0.01236,-0.005509,-0.002979,-0.003146,-0.00247,0.009454,0.003625,0.000688
359,80,ns12-N602-0859.wav,5.200000,-463.246063,186.532913,-7.294523,59.409134,27.469156,-8.858232,29.274824,...,-0.010352,-0.01393,-0.002696,-0.015298,-0.007763,0.000753,-0.010489,0.0056,0.001995,-0.01326
360,80,ns13-N603-1527.wav,4.250000,-417.487518,184.109451,-65.363724,46.153511,36.601055,-10.332705,27.301556,...,-0.010015,-0.020555,0.007385,-0.015139,-0.007042,0.004971,-0.013193,0.000711,0.000445,-0.007092
361,80,ns14-N604-1029.wav,5.750000,-459.146149,200.875412,9.043395,44.290855,26.432962,-1.009963,21.638632,...,-0.00581,-0.015356,-0.000227,-0.004554,0.006147,0.020121,-0.000323,-0.008783,-0.001035,0.000338


In [17]:
data.to_excel('./output/78_columns_mfcc.xlsx', index=False, encoding='utf_8_sig')