In [14]:
import numpy as np
from scipy.io import wavfile

# 加载两个wav音频文件
rate1, data1 = wavfile.read('./input/test/audio1.wav')
rate2, data2 = wavfile.read('./input/test/audio2.wav')
rate3, data3 = wavfile.read('./input/test/audio3.wav')
rate1, rate2, rate3
data3.shape

(10187776, 4)

In [5]:
# 确保两个音频文件的采样率相同
assert rate1 == rate2

# 将音频数据转换为单通道（如果不是单通道）
if len(data1.shape) > 1:
    data1 = data1[:, 0]
if len(data2.shape) > 1:
    data2 = data2[:, 0]

# 定义每个时间段的长度，这里设为1秒
time_interval = 1 * rate1
# 计算时间段的数量
num_intervals = int(np.ceil(len(data1) / time_interval))


# 定义A加权滤波器
def a_weighting(f):
    """计算A加权滤波器在给定频率f下的增益"""
    # 跳过频率为0的点
    f = f[1:]
    return 20 * np.log10(
        2.71828 ** (-5.5 * (np.log10(f / 1000)) ** 2) * (1.5 * ((f / 1000) ** 2 + 20 ** 2) ** 0.5) / (f / 1000) ** 2 / ((f / 1000) ** 2 + 107.7 ** 2) ** 0.5 + 0.000001)


# 对每个音频文件进行LEQ计算
leq1 = 0
leq2 = 0
for i in range(num_intervals):
    # 取出当前时间段的音频数据
    interval_data1 = data1[i * time_interval: (i + 1) * time_interval] / 32767.0
    interval_data2 = data2[i * time_interval: (i + 1) * time_interval] / 32767.0

    # 裁剪采样数据，将数据限制在 -1 到 1 的范围内
    interval_data1 = np.clip(interval_data1, -1, 1)
    interval_data2 = np.clip(interval_data2, -1, 1)

    # 计算当前时间段内的声压级
    spl1 = 20 * np.log10(np.sqrt(np.mean(interval_data1 ** 2)))
    spl2 = 20 * np.log10(np.sqrt(np.mean(interval_data2 ** 2)))

    # 对声压级进行A加权
    spl1_a = spl1 + a_weighting(np.linspace(0, rate1 / 2, len(interval_data1) // 2 + 1))
    spl2_a = spl2 + a_weighting(np.linspace(0, rate1 / 2, len(interval_data2) // 2 + 1))

    # 对A加权后的声压级进行LEQ计算
    leq1 += np.mean(spl1_a) ** 2
    leq2 += np.mean(spl2_a) ** 2

leq1 = 10 * np.log10(leq1 / num_intervals)
leq2 = 10 * np.log10(leq2 / num_intervals)

print('音频1的LEQ为：{:.2f} dB'.format(leq1))
print('音频2的LEQ为：{:.2f} dB'.format(leq2))


音频1的LEQ为：40.16 dB
音频2的LEQ为：40.39 dB


In [3]:
rate1

44100

In [11]:
import wave

with wave.open('./input/test/audio3.wav', 'rb') as audio:
    sample_width = audio.getsampwidth()
    print(f"采样深度为：{sample_width} bytes")

Error: unknown format: 3

In [13]:
data3

array([[-5.9314687e-02, -2.0501588e-01,  2.5348153e-03,  1.4804414e-04],
       [-6.2018484e-02, -1.9504559e-01, -1.5884844e-03, -1.7395186e-03],
       [-6.3100010e-02, -1.8842128e-01,  2.6700057e-03, -7.0320966e-04],
       ...,
       [-4.7708607e-01, -2.8177008e-01, -6.6243177e-03, -5.5886656e-03],
       [-4.9523535e-01, -2.7872831e-01, -5.0358335e-03, -1.1473420e-03],
       [-5.0381994e-01, -2.8021538e-01, -3.5149441e-03, -6.4029088e-03]],
      dtype=float32)

In [16]:
import numpy as np
from scipy.io import wavfile
from python_speech_features import audio

# 读取音频文件
rate1, data1 = wavfile.read('audio1.wav')

# 计算当前Leq
data1 = audio.convert_audio_samples(data1, orig_rate=rate1, target_rate=rate1)
current_leq = 20 * np.log10(np.sqrt(np.mean(np.square(data1))) / 20e-6)
print(current_leq)


ImportError: cannot import name 'audio' from 'python_speech_features' (E:\Python\Anaconda\lib\site-packages\python_speech_features\__init__.py)

In [21]:
import numpy as np


def calculate_leq(signal, fs):
    """计算给定信号的LEQ

    Args:
        signal: ndarray，输入信号数组
        fs: float，采样率

    Returns:
        float，LEQ值
    """
    # 计算能量
    energy = np.sum(signal ** 2)

    # 计算RMS
    rms = np.sqrt(energy / len(signal))

    # 计算A权ing
    A_weighting = np.array([31.5, 63, 125, 250, 500, 1000, 2000, 4000, 8000, 16000])
    A_weighting_db = np.array([-39.4, -26.2, -16.1, -8.6, -3.2, 0, 1.2, 1, -1.1, -6.6])
    f = np.array([16.35, 32.7, 65.4, 130.8, 261.6, 523.3, 1047, 2093, 4186, 8372])
    C = np.array([0.0002, 0.0005, 0.0012, 0.0025, 0.0050, 0.0100, 0.0200, 0.0400, 0.0800, 0.1600])
    k = np.array([1.6, 1.2, 0.9, 0.7, 0.6, 0.6, 0.6, 0.7, 0.8, 1.0])
    A_filter = np.sum((10 ** (A_weighting_db / 10)) ** 2 * C * k)
    filter_gain = np.interp(f, A_weighting, 10 ** (A_weighting_db / 20))
    A_rms = np.sqrt(np.sum((signal * filter_gain) ** 2) / len(signal) / A_filter)

    # 计算LEQ
    leq = 20 * np.log10(A_rms / 20e-6)

    return leq


In [22]:
from scipy.io import wavfile

# 读取音频文件
# 计算当前LEQ
current_leq = calculate_leq(data1, rate1)

# 修改LEQ值为60dB
target_leq = 60
scale_factor = 10 ** ((target_leq - current_leq) / 20.0)
scaled_data = data1 * scale_factor

current_leq

  rms = np.sqrt(energy / len(signal))


ValueError: operands could not be broadcast together with shapes (220500,) (10,) 

In [24]:
import librosa
import numpy as np

def calculate_leq(audio_file, window_size=44100, hop_length=22050):
    # 读取音频文件
    y, sr = librosa.load(audio_file, sr=None)

    # 计算每个窗口的rms值
    rms = librosa.feature.rms(y, frame_length=window_size, hop_length=hop_length)[0]

    # 将rms值转换为分贝（dB）单位
    db = librosa.amplitude_to_db(rms)

    # 计算Leq(A)值
    leq = np.mean(db)

    return leq

In [25]:
# 示例用法
audio_file = './input/test/audio1.wav'
leq = calculate_leq(audio_file)
print(f"音频文件 {audio_file} 的 Leq(A) 值为：{leq:.2f} dB")

TypeError: rms() takes 0 positional arguments but 1 positional argument (and 2 keyword-only arguments) were given