In [5]:
import os
from pydub import AudioSegment

def average_wav_length_difference(folder1, folder2):
    """
    对两个文件夹中同名的 wav 文件，计算平均长度差（秒）。

    :param folder1: 第一个文件夹路径
    :param folder2: 第二个文件夹路径
    :return: 平均长度差（秒）
    """
    lengths_diff = []
    
    # 遍历 folder1
    for file in os.listdir(folder1):
        if not file.lower().endswith(".wav"):
            continue
        file1_path = os.path.join(folder1, file)
        file2_path = os.path.join(folder2, file)
        
        # 只处理两个文件夹都有的文件
        if not os.path.exists(file2_path):
            continue
        
        # 读取音频长度（秒）
        len1 = len(AudioSegment.from_wav(file1_path)) / 1000.0
        len2 = len(AudioSegment.from_wav(file2_path)) / 1000.0
        
        lengths_diff.append(abs(len1 - len2))
    
    if not lengths_diff:
        return 0.0  # 没有匹配文件
    
    avg_diff = sum(lengths_diff) / len(lengths_diff)
    return avg_diff

# ===== 调用示例 =====
folder_a = "checkpoints/opencpop_visinger2/exp/svs_visinger2_normal/suno_pop/test/wav"
folder_b = "checkpoints/opencpop_visinger2/exp/svs_visinger2_normal/suno_pop_tempo71/test/wav"

folder_a = "/data2/fwh/espnet/egs2/opencpop_benchmark/svs1/checkpoints/opencpop_naive_rnn_dp/exp/svs_train_naive_rnn_dp_raw_phn_None_zh/suno_pop/test/wav"
folder_b = "/data2/fwh/espnet/egs2/opencpop_benchmark/svs1/checkpoints/opencpop_naive_rnn_dp/exp/svs_train_naive_rnn_dp_raw_phn_None_zh/suno_pop_tempo71/test/wav"

folder_a = "/data2/fwh/espnet/egs2/opencpop_benchmark/svs1/checkpoints/opencpop_xiaoice/exp/svs_train_xiaoice_raw_phn_None_zh/suno_pop/test/wav"
folder_b = "/data2/fwh/espnet/egs2/opencpop_benchmark/svs1/checkpoints/opencpop_xiaoice/exp/svs_train_xiaoice_raw_phn_None_zh/suno_pop_tempo71/test/wav"

avg_diff = average_wav_length_difference(folder_a, folder_b)
print(f"Average length difference: {avg_diff:.3f} seconds")


Average length difference: 0.000 seconds
