In [2]:
r"""
AddGaussianNoise Augmentation
output audio `noisy_00001.wav` to `target_dir`
save audio info to `noisy_audio_info.json`
"""

import os
import json
from audiomentations import Compose, AddGaussianNoise
import soundfile as sf
import numpy as np
import pandas as pd

attribute_csv = "./Deception/CBU0521DD_stories_attributes.csv"  # header: filename|Language|Story_type
df = pd.read_csv(attribute_csv)

source_dir = "./Deception/CBU0521DD_stories/"
target_dir = "./dataset/audio/"
info_dir = "./dataset/info/"
noise_stddev = 0.005  # power of noise

os.makedirs(target_dir, exist_ok=True)
os.makedirs(info_dir, exist_ok=True)

# 创建一个音频增强链，包括添加高斯白噪声
augment = Compose([
    AddGaussianNoise(min_amplitude=0.001, max_amplitude=0.015, p=1.0)
])

# temp list for augmented audio info
audio_info_list = []
noisy_audio_info_file = "noisy_audio_info.json"

def extract_number(filename):
    # file name pattern is "00001.wav", split to extract number as sorting key
    return int(filename.split('.')[0])

# 遍历源目录中的所有wav文件
for idx, filename in enumerate(sorted(os.listdir(source_dir),key=extract_number)):
    if filename.endswith(".wav"):
        # 获取音频文件的属性
        audio_attributes = df[df['filename'] == filename].iloc[0]
        
        # 构建完整的文件路径
        file_path = os.path.join(source_dir, filename)
        
        # 读取音频文件
        audio, sample_rate = sf.read(file_path)

        # 确保音频数据类型为 np.float32
        audio = audio.astype(np.float32)
        
        # 检查音频是否为双声道
        if audio.ndim == 2 and audio.shape[1] == 2:
            # 分别对两个声道应用增强
            augmented_channel_1 = augment(samples=audio[:, 0], sample_rate=sample_rate)
            augmented_channel_2 = augment(samples=audio[:, 1], sample_rate=sample_rate)
            
            # 将增强后的两个声道合并为一个立体声音频
            augmented_audio = np.stack((augmented_channel_1, augmented_channel_2), axis=-1)
        else:
            # 如果不是双声道，直接应用增强
            augmented_audio = augment(samples=audio, sample_rate=sample_rate)
        
        # 构建新的文件名和路径
        new_filename = f"noisy_{str(idx+1).zfill(5)}.wav" # start from 00001
        new_file_path = os.path.join(target_dir, new_filename)
        
        # 保存增强后的音频文件
        sf.write(new_file_path, augmented_audio, sample_rate)
        
        # 构建音频文件的增强信息
        audio_info = {
            "id": idx,
            "filename": new_filename,
            "augment": "noisy",
            "Language": audio_attributes['Language'],
            "Story_type": audio_attributes['Story_type']
        }
        
        # 将音频文件的增强信息添加到列表中
        audio_info_list.append(audio_info)
        pass

# 将音频文件的增强信息写入到JSON文件中
json_file_path = os.path.join(info_dir, noisy_audio_info_file)
with open(json_file_path, 'w') as json_file:
    json.dump(audio_info_list, json_file, indent=4)

print(f"edited audio stored in {target_dir}, info stored in {info_dir}")

edited audio stored in ./dataset/audio/, info stored in ./dataset/info/


In [3]:
r"""
PitchShift Augmentation
output audio `pitchshift_00001.wav` to `target_dir`
save audio info to `pitchshift_audio_info.json`
"""

import os
import json
from audiomentations import Compose, PitchShift
import soundfile as sf
import numpy as np
import pandas as pd

attribute_csv = "./Deception/CBU0521DD_stories_attributes.csv"  # header: filename|Language|Story_type
df = pd.read_csv(attribute_csv)


source_dir = "./Deception/CBU0521DD_stories/"
target_dir = "./dataset/audio/"
info_dir = "./dataset/info/"

os.makedirs(target_dir, exist_ok=True)
os.makedirs(info_dir, exist_ok=True)

# 创建一个音频增强链，包括音高变换
augment = Compose([
    PitchShift(min_semitones=-2, max_semitones=2, p=1.0)
])

# temp list for augmented audio info
audio_info_list = []
pitchshift_audio_info_file = "pitchshift_audio_info.json"

def extract_number(filename):
    # file name pattern is "00001.wav", split to extract number as sorting key
    return int(filename.split('.')[0])

# 遍历源目录中的所有wav文件
for idx, filename in enumerate(sorted(os.listdir(source_dir),key=extract_number)):
    if filename.endswith(".wav"):
        # 获取音频文件的属性
        audio_attributes = df[df['filename'] == filename].iloc[0]
        
        # 构建完整的文件路径
        file_path = os.path.join(source_dir, filename)
        
        # 读取音频文件
        audio, sample_rate = sf.read(file_path)
        
        # 确保音频数据类型为 np.float32
        audio = audio.astype(np.float32)
        
        # 检查音频是否为双声道
        if audio.ndim == 2 and audio.shape[1] == 2:
            # 分别对两个声道应用增强
            augmented_channel_1 = augment(samples=audio[:, 0], sample_rate=sample_rate)
            augmented_channel_2 = augment(samples=audio[:, 1], sample_rate=sample_rate)
            
            # 将增强后的两个声道合并为一个立体声音频
            augmented_audio = np.stack((augmented_channel_1, augmented_channel_2), axis=-1)
        else:
            # 如果不是双声道，直接应用增强
            augmented_audio = augment(samples=audio, sample_rate=sample_rate)
        
        # 构建新的文件名和路径
        new_filename = f"pitchshift_{str(idx+1).zfill(5)}.wav" # start from 00001
        new_file_path = os.path.join(target_dir, new_filename)
        
        # 保存增强后的音频文件
        sf.write(new_file_path, augmented_audio, sample_rate)

        # 构建音频文件的增强信息
        audio_info = {
            "id": idx,
            "filename": new_filename,
            "augment": "pitchshift",
            "Language": audio_attributes['Language'],
            "Story_type": audio_attributes['Story_type']
        }
        
        # 将音频文件的增强信息添加到列表中
        audio_info_list.append(audio_info)
        pass

# 将音频文件的增强信息写入到JSON文件中
json_file_path = os.path.join(info_dir, pitchshift_audio_info_file)
with open(json_file_path, 'w') as json_file:
    json.dump(audio_info_list, json_file, indent=4)

print(f"edited audio stored in {target_dir}, info stored in {info_dir}")

edited audio stored in ./dataset/audio/, info stored in ./dataset/info


In [4]:
r"""
TimeMask Augmentation
output audio `timemask_00001.wav` to `target_dir`
save audio info to `timemask_audio_info.json`
"""

import os
import json
from audiomentations import Compose, TimeMask
import soundfile as sf
import numpy as np
import pandas as pd


attribute_csv = "./Deception/CBU0521DD_stories_attributes.csv"  # header: filename|Language|Story_type
df = pd.read_csv(attribute_csv)


source_dir = "./Deception/CBU0521DD_stories/"
target_dir = "./dataset/audio/"
info_dir = "./dataset/info/"

os.makedirs(target_dir, exist_ok=True)
os.makedirs(info_dir, exist_ok=True)

# 创建一个音频增强链，包括时间掩蔽
augment = Compose([
    TimeMask(max_band_part=0.1,p=1.0)  # p=1.0 表示总是应用时间掩蔽
])

# temp list for augmented audio info
audio_info_list = []
timemask_audio_info_file = "timemask_audio_info.json"

def extract_number(filename):
    # file name pattern is "00001.wav", split to extract number as sorting key
    return int(filename.split('.')[0])

# 遍历源目录中的所有wav文件
for idx, filename in enumerate(sorted(os.listdir(source_dir),key=extract_number)):
    if filename.endswith(".wav"):
        # 获取音频文件的属性
        audio_attributes = df[df['filename'] == filename].iloc[0]
        
        # 构建源文件和目标文件的完整路径
        file_path = os.path.join(source_dir, filename)

        # 读取音频文件
        audio, sample_rate = sf.read(file_path)

        # 确保音频是np.float32类型
        audio = audio.astype(np.float32)

        # 检查音频是否为双声道
        if audio.ndim == 2 and audio.shape[1] == 2:
            # 分别对两个声道应用增强
            augmented_channel_1 = augment(samples=audio[:, 0], sample_rate=sample_rate)
            augmented_channel_2 = augment(samples=audio[:, 1], sample_rate=sample_rate)
            
            # 将增强后的两个声道合并为一个立体声音频
            augmented_audio = np.stack((augmented_channel_1, augmented_channel_2), axis=-1)
        else:
            # 如果不是双声道，直接应用增强
            augmented_audio = augment(samples=audio, sample_rate=sample_rate)

        # 构建新的文件名和路径
        new_filename = f"timemask_{str(idx+1).zfill(5)}.wav" # start from 00001
        new_file_path = os.path.join(target_dir, new_filename)
        
        # 保存增强后的音频文件
        sf.write(new_file_path, augmented_audio, sample_rate)

        # 构建音频文件的增强信息
        audio_info = {
            "id": idx,
            "filename": new_filename,
            "augment": "timemask",
            "Language": audio_attributes['Language'],
            "Story_type": audio_attributes['Story_type']
        }
        # print(filename, audio_attributes['filename'],audio_attributes['Language'])
        
        # 将音频文件的增强信息添加到列表中
        audio_info_list.append(audio_info)
        pass

# 将音频文件的增强信息写入到JSON文件中
json_file_path = os.path.join(info_dir, timemask_audio_info_file)
with open(json_file_path, 'w') as json_file:
    json.dump(audio_info_list, json_file, indent=4)

print(f"edited audio stored in {target_dir}, info stored in {info_dir}")

edited audio stored in ./dataset/audio/, info stored in ./dataset/info
