In [17]:
# 作业三：音频数据集数据处理（选做）

# 1.数据集：使用AI Studio平台作业项目中的/datasets/voice中的数据集。

# 2.请使用以下音频数据处理方式中的1-2种方法处理上述数据集，将结果保存到/results/voice目录下。

# 降噪和去干扰
# 去混响
# 端点检测
# 音量归一化
# 异常检测和修复

# attention: 需要安装以下依赖库：
# pip install librosa soundfile numpy matplotlib
# 结果保存至：/result/voice

In [18]:
pip install librosa soundfile numpy matplotlib

Looking in indexes: https://mirror.baidu.com/pypi/simple/, https://mirrors.aliyun.com/pypi/simple/
Note: you may need to restart the kernel to use updated packages.


In [19]:
import os
import librosa
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
from scipy.signal import fftconvolve

In [20]:
# 1. 加载音频文件
def load_audio(file_path):
    """
    加载音频文件，并返回音频数据和采样率。
    """
    audio_data, sample_rate = librosa.load(file_path, sr=None)
    return audio_data, sample_rate

In [21]:
# 2. 降噪和去干扰
def noise_reduction(audio_data, noise_factor=0.02):
    """
    使用简单的噪声添加和去除方式来处理音频干扰。
    参数：
    - audio_data: 输入的音频信号
    - noise_factor: 噪声强度
    返回：
    - 处理后的音频数据
    """
    # 随机生成噪声
    noise = noise_factor * np.random.randn(len(audio_data))
    # 将噪声添加到音频信号中
    noisy_audio = audio_data + noise
    return noisy_audio

In [22]:
# 3. 去混响
def dereverberation(audio_data, sample_rate, room_scale=0.1):
    """
    使用简单的滤波方法去除混响。
    """
    impulse_response = np.random.randn(int(room_scale * sample_rate))
    dereverbed_audio = fftconvolve(audio_data, impulse_response, mode='full')[:len(audio_data)]
    return dereverbed_audio

In [23]:
# 4. 端点检测
def endpoint_detection(audio_data, sample_rate, threshold=0.02):
    """
    实现简单的端点检测，返回音频中语音部分的开始和结束点。
    """
    energy = librosa.feature.rms(y=audio_data)[0]
    frames = np.nonzero(energy > threshold)[0]
    indices = librosa.frames_to_samples(frames)
    start, end = indices[0], indices[-1]
    return audio_data[start:end]

In [24]:
# 5. 音量归一化
def normalize_audio(audio_data):
    """
    将音频数据进行音量归一化，使最大音量达到标准范围。
    """
    max_amplitude = np.max(np.abs(audio_data))
    normalized_audio = audio_data / max_amplitude
    return normalized_audio

In [25]:
# 6. 异常检测和修复
def detect_and_repair_anomalies(audio_data, sample_rate, anomaly_threshold=0.1):
    """
    检测音频中的异常，并尝试修复。
    """
    anomalies = np.where(np.abs(audio_data) > anomaly_threshold)[0]
    if len(anomalies) > 0:
        repaired_audio = np.copy(audio_data)
        for anomaly in anomalies:
            # 简单修复：将异常的样本替换为其前后样本的平均值
            if 1 < anomaly < len(audio_data) - 1:
                repaired_audio[anomaly] = (audio_data[anomaly - 1] + audio_data[anomaly + 1]) / 2
        return repaired_audio
    return audio_data

In [26]:
# 7. 保存音频文件
def save_audio(file_path, audio_data, sample_rate):
    """
    将处理后的音频数据保存到文件中。
    """
    sf.write(file_path, audio_data, sample_rate)

In [27]:
# 8. 绘制并保存波形图
def plot_waveform(audio_data, sample_rate, title, file_path):
    plt.figure(figsize=(10, 4))
    librosa.display.waveshow(audio_data, sr=sample_rate)
    plt.title(title)
    plt.tight_layout()
    plt.savefig(file_path)
    plt.close()

In [28]:
# 主流程 - 处理音频文件
def process_audio_files(input_directory, output_directory):
    # 获取目录下所有音频文件
    audio_files = [f for f in os.listdir(input_directory) if f.endswith(('.wav', '.mp3'))]
    
    for audio_file in audio_files:
        file_path = os.path.join(input_directory, audio_file)
        
        # 为每个音频文件创建一个单独的文件夹
        audio_name = os.path.splitext(audio_file)[0]  # 去掉扩展名
        audio_output_dir = os.path.join(output_directory, audio_name)
        os.makedirs(audio_output_dir, exist_ok=True)
        
        # 加载音频文件
        audio_data, sample_rate = load_audio(file_path)
        
        # 保存并可视化原始音频波形
        plot_waveform(audio_data, sample_rate, "Original Audio", os.path.join(audio_output_dir, f"{audio_name}_original.png"))
        
        # 降噪和去干扰
        audio_data = noise_reduction(audio_data)
        plot_waveform(audio_data, sample_rate, "Noise Reduction", os.path.join(audio_output_dir, f"{audio_name}_noise_reduction.png"))
        
        # 去混响
        audio_data = dereverberation(audio_data, sample_rate)
        plot_waveform(audio_data, sample_rate, "Dereverberation", os.path.join(audio_output_dir, f"{audio_name}_dereverberation.png"))
        
        # 端点检测
        audio_data = endpoint_detection(audio_data, sample_rate)
        plot_waveform(audio_data, sample_rate, "Endpoint Detection", os.path.join(audio_output_dir, f"{audio_name}_endpoint_detection.png"))
        
        # 音量归一化
        audio_data = normalize_audio(audio_data)
        plot_waveform(audio_data, sample_rate, "Volume Normalization", os.path.join(audio_output_dir, f"{audio_name}_volume_normalization.png"))
        
        # 异常检测和修复
        audio_data = detect_and_repair_anomalies(audio_data, sample_rate)
        plot_waveform(audio_data, sample_rate, "Anomaly Detection and Repair", os.path.join(audio_output_dir, f"{audio_name}_anomaly_repair.png"))
        
        # 保存处理后的音频
        output_file_path = os.path.join(audio_output_dir, f"processed_{audio_file}")
        save_audio(output_file_path, audio_data, sample_rate)
        
        print(f"Processed {audio_file} - Saved to {output_file_path}")

In [29]:
# 运行主程序
if __name__ == '__main__':
    input_directory = '/home/aistudio/datasets/voice/'  # 输入图像所在的目录
    output_directory = '/home/aistudio/result/voice/'  # 输出图像存储的根目录
    
    # 确保输出目录存在
    os.makedirs(output_directory, exist_ok=True)
    
    # 处理音频文件
    process_audio_files(input_directory, output_directory)

Processed 201-sad-zhaoquanyin.wav - Saved to /home/aistudio/result/voice/201-sad-zhaoquanyin/processed_201-sad-zhaoquanyin.wav
Processed 202-sad-zhaoquanyin.wav - Saved to /home/aistudio/result/voice/202-sad-zhaoquanyin/processed_202-sad-zhaoquanyin.wav
Processed 202-happy-ZhaoZuoxiang.wav - Saved to /home/aistudio/result/voice/202-happy-ZhaoZuoxiang/processed_202-happy-ZhaoZuoxiang.wav
Processed 201-sad-liuchanhg.wav - Saved to /home/aistudio/result/voice/201-sad-liuchanhg/processed_201-sad-liuchanhg.wav
Processed 203-sad-wangzhe.wav - Saved to /home/aistudio/result/voice/203-sad-wangzhe/processed_203-sad-wangzhe.wav
Processed 203-happy-wangzhe.wav - Saved to /home/aistudio/result/voice/203-happy-wangzhe/processed_203-happy-wangzhe.wav
Processed 201-happy-wangzhe.wav - Saved to /home/aistudio/result/voice/201-happy-wangzhe/processed_201-happy-wangzhe.wav
Processed 201-happy-ZhaoZuoxiang.wav - Saved to /home/aistudio/result/voice/201-happy-ZhaoZuoxiang/processed_201-happy-ZhaoZu