In [20]:
from audiomentations import AddBackgroundNoise, PolarityInversion, Compose,AddGaussianNoise, AddGaussianSNR, AddShortNoises, Mp3Compression, Compose
import soundfile as sf
import numpy as np
import librosa
import audiomentations
import pandas as pd

# Standard augment

In [21]:
import librosa
import soundfile as sf
import os
import ffmpeg

def get_all_csv_files(root_dir):
    """
    Lấy tất cả các tệp .csv từ thư mục cha và các thư mục con.
    :input: Đường dẫn đến thư mục gốc
    :return: Danh sách các tệp .csv nằm trong
    """
    wav_files = []
    # Duyệt qua tất cả các thư mục và tệp trong thư mục gốc
    for dirpath, dirnames, filenames in os.walk(root_dir):
        for filename in filenames:
            if filename.endswith('.wav'):
                # Thêm đường dẫn đầy đủ của tệp .csv vào danh sách
                wav_files.append(os.path.join(dirpath, filename))
    return wav_files

def convert_audio(input_file,output_file):
    ffmpeg.input(input_file).output(output_file, format='wav', acodec='pcm_s16le', ar='16000', ac=1).run()



def augment_audio_standard(input_path, output_path, time_stretch_rate=1.2, pitch_shift_steps=1.3):
    """
    Augment audio by time-stretching and pitch-shifting.

    Parameters:
    input_path (str): Path to the input audio file.
    output_path (str): Path to save the augmented audio file.
    time_stretch_rate (float): Rate to stretch the time (default is 1.2, i.e., 20% longer).
    pitch_shift_steps (int): Number of half-steps to shift the pitch (default is 2 half-steps up).

    Returns:
    None
    """
    # Load the audio file
    y, sr = librosa.load(input_path, sr=None)

    # Time-stretching
    y_stretched = librosa.effects.time_stretch(y, time_stretch_rate)

    # Pitch-shifting
    y_shifted = librosa.effects.pitch_shift(y_stretched, sr, n_steps=pitch_shift_steps)

    # Save the augmented audio to a new file
    sf.write(output_path, y_shifted, sr)

    print(f"Augmented audio saved to {output_path}")

# add short noise

In [27]:
from audiomentations import Compose, AddShortNoises, Gain
def add_short_noise(audio_path_input,audio_path_output, noise_dir, noise_gain_db=15):
    """
    Input: Path âm thanh gốc
    Output: Âm thanh + các nhịp âm thanh short cách nhau một khoảng random
    """
    # Load the audio file
    audio, sr = librosa.load(audio_path_input, sr=None)
    
    # Create the AddShortNoises augmenter
    augmenter = Compose([
        AddShortNoises(
            sounds_path=noise_dir,  # Pass the directory path directly
            min_snr_in_db=10,
            max_snr_in_db=30,
            min_time_between_sounds=0.0005,  # tgian tạm dừng tối thiểu của noise
            max_time_between_sounds=0.001,  # tgian tạm dừng tối đa của noise
            p=1.0
        ),
        Gain(min_gain_in_db=noise_gain_db, max_gain_in_db=noise_gain_db, p=1.0)  # Tăng cường âm lượng của noise
    ])
    
    # Apply the augmentation
    augmented_audio = augmenter(samples=audio, sample_rate=sr)
    sf.write(audio_path_output,augmented_audio, 16000)

## add White noise

In [32]:
def add_white_noise(path_input_audio, audio_path_output, noise_level=0.04):
    """
    Input: Path âm thanh origin
    Output: Âm thanh thêm nhiễu trắng (mô phỏng âm thanh nhiễu từ đài)
    Tham khảo: https://cmtext.indiana.edu/acoustics/chapter1_waves3.php#:~:text=GAUSSIAN%20NOISE&text=Named%20after%20mathematician%2Fphysicist%20Carl,as%20lacking%20in%20bass%20frequencies.
    """
    audio,_ = sf.read(path_input_audio)
    audio_augment = audio + noise_level * np.random.randn(len(audio))
    sf.write(audio_path_output,audio_augment,16000)

In [33]:
# wav_lists = get_all_csv_files("/home/pdnguyen/Audio_Augmentation/fubon_tiktok")
csv = pd.read_csv("/home/pdnguyen/Audio_Augmentation/fubon_tiktok/final_label.csv")
with open('/home/pdnguyen/Audio_Augmentation/fubon_white_noise/metadata.csv', 'w', encoding='utf8') as fp:
    print(f'file,text,duration', file=fp)
    for index,row in csv.iterrows():
        name_wav = row["file"].split("/")[-1].split(".")[0] + f"{index}_white_noise.wav"
        audio_path = f"/home/pdnguyen/Audio_Augmentation/fubon_white_noise/wavs/{name_wav}"
        add_white_noise(row["file"],audio_path)
        print(f'{audio_path},{str(row["text"])},{row["duration"]}', file=fp)