In [6]:
import os
import wave

folder_path = "./label1_padding/label1"
# folder_path = "./label0"

wav_count = 0
total_duration = 0.0

longer_than_1 = 0

# 遍历文件夹
for filename in os.listdir(folder_path):
    if filename.endswith('.wav'):
        filepath = os.path.join(folder_path, filename)
        try:
            with wave.open(filepath, 'rb') as wav_file:
                frames = wav_file.getnframes()
                rate = wav_file.getframerate()
                duration = frames / float(rate)
                total_duration += duration
                wav_count += 1
                if duration > 1.01:
                    longer_than_1 += 1
        except wave.Error as e:
            print(f"⚠️ 无法读取 {filename}，跳过。错误信息: {e}")

# 计算平均时长
average_duration = total_duration / wav_count if wav_count > 0 else 0

print(f"🎵 WAV文件数量: {wav_count}")
print(f"⏳ 总时长: {total_duration:.2f} 秒 ({total_duration/60:.2f} 分钟)")
print(f"📏 平均每个文件时长: {average_duration:.2f} 秒 ({average_duration/60:.2f} 分钟)")
print(longer_than_1)

🎵 WAV文件数量: 12000
⏳ 总时长: 12000.00 秒 (200.00 分钟)
📏 平均每个文件时长: 1.00 秒 (0.02 分钟)
0


In [None]:
import os
from pydub import AudioSegment


root_folder = "./"
output_folder0 = "./label0_padding"
output_folder1 = "./label1_padding"
os.makedirs(output_folder0, exist_ok=True)
os.makedirs(output_folder1, exist_ok=True)

# 目标长度（单位：毫秒）
target_length_ms = 1000  # 1秒 = 1000ms

# 要处理的标签文件夹
label_folders = ["label0", "label1"]

for label in label_folders:
    input_dir = os.path.join(root_folder, label)
    output_dir = os.path.join(output_folder0, label) if label == "label0" else os.path.join(output_folder1, label)
    os.makedirs(output_dir, exist_ok=True)

    for filename in os.listdir(input_dir):
        if filename.endswith(".wav"):
            file_path = os.path.join(input_dir, filename)
            audio = AudioSegment.from_wav(file_path)

            if label == "label0":
                if len(audio) < target_length_ms:
                    padding_needed = target_length_ms - len(audio)
                    padding_left = padding_needed // 2
                    padding_right = padding_needed - padding_left
                    silence_left = AudioSegment.silent(duration=padding_left)
                    silence_right = AudioSegment.silent(duration=padding_right)
                    audio = silence_left + audio + silence_right
                else:
                    audio = audio[:target_length_ms]

            elif label == "label1":
                audio = audio[:target_length_ms]

            output_path = os.path.join(output_dir, filename)
            audio.export(output_path, format="wav")
            print(f"✅ Process finished: {output_path}")

print("🎯 FINISHED!")

In [8]:
import os
import numpy as np
import soundfile as sf

# 输入输出路径
output_folder0 = "./label0"
output_folder1 = "./label1"
os.makedirs(output_folder0, exist_ok=True)
os.makedirs(output_folder1, exist_ok=True)

# 处理这两个文件夹
label_folders = ["label0_ori", "label1_ori"]

# 目标采样率和裁剪时长
TARGET_SR = 16000
CLIP_DURATION = 0.5  # seconds
CLIP_LENGTH = int(CLIP_DURATION * TARGET_SR)

# 声音起点检测阈值
THRESHOLD_RATIO = 0.05  # 5% max sound
for label_folder in label_folders:
    input_folder = os.path.join(label_folder)

    # 确定对应的输出文件夹
    if label_folder == "label0_ori":
        output_folder = output_folder0
    elif label_folder == "label1_ori":
        output_folder = output_folder1
    else:
        raise ValueError(f"未知的label_folder: {label_folder}")

    os.makedirs(output_folder, exist_ok=True)

    for filename in os.listdir(input_folder):
        if not filename.endswith(".wav"):
            continue

        filepath = os.path.join(input_folder, filename)
        waveform, sr = sf.read(filepath)

        if waveform.ndim > 1:
            waveform = waveform[:, 0]

        if sr != TARGET_SR:
            raise ValueError(f"采样率不一致，期待{TARGET_SR}Hz，实际{sr}Hz")

        max_amplitude = np.max(np.abs(waveform))
        threshold = THRESHOLD_RATIO * max_amplitude

        non_silent_indices = np.where(np.abs(waveform) > threshold)[0]

        if len(non_silent_indices) == 0:
            print(f"⚠️ 文件 {filename} 完全静音，跳过")
            continue

        start_idx = non_silent_indices[0]
        clip_start = start_idx
        clip_end = start_idx + CLIP_LENGTH

        if clip_end > len(waveform):
            clip_end = len(waveform)

        clipped_waveform = waveform[clip_start:clip_end]

        if len(clipped_waveform) < CLIP_LENGTH:
            padding_needed = CLIP_LENGTH - len(clipped_waveform)
            clipped_waveform = np.concatenate([
                clipped_waveform,
                np.zeros(padding_needed, dtype=clipped_waveform.dtype)
            ])

        output_path = os.path.join(output_folder, filename)
        sf.write(output_path, clipped_waveform, samplerate=sr)
        print(f"✅ 处理完成: {output_path}")

✅ 处理完成: ./label0/001167.wav
✅ 处理完成: ./label0/006608.wav
✅ 处理完成: ./label0/007516.wav
✅ 处理完成: ./label0/000279.wav
✅ 处理完成: ./label0/003770.wav
✅ 处理完成: ./label0/005301.wav
✅ 处理完成: ./label0/002308.wav
✅ 处理完成: ./label0/005467.wav
✅ 处理完成: ./label0/004779.wav
✅ 处理完成: ./label0/003016.wav
✅ 处理完成: ./label0/007270.wav
✅ 处理完成: ./label0/001601.wav
✅ 处理完成: ./label0/007264.wav
✅ 处理完成: ./label0/001615.wav
✅ 处理完成: ./label0/005473.wav
✅ 处理完成: ./label0/003002.wav
✅ 处理完成: ./label0/003764.wav
✅ 处理完成: ./label0/005315.wav
✅ 处理完成: ./label0/001173.wav
✅ 处理完成: ./label0/007502.wav
✅ 处理完成: ./label0/004023.wav
✅ 处理完成: ./label0/003994.wav
✅ 处理完成: ./label0/002452.wav
✅ 处理完成: ./label0/006634.wav
✅ 处理完成: ./label0/000245.wav
✅ 处理完成: ./label0/000523.wav
✅ 处理完成: ./label0/006152.wav
✅ 处理完成: ./label0/002334.wav
✅ 处理完成: ./label0/004745.wav
✅ 处理完成: ./label0/004989.wav
✅ 处理完成: ./label0/002320.wav
✅ 处理完成: ./label0/004751.wav
✅ 处理完成: ./label0/000537.wav
✅ 处理完成: ./label0/007258.wav
✅ 处理完成: ./label0/006146.wav
✅ 处理完成: ./label0/001