In [None]:
import numpy as np
import soundfile as sf
import os
import os
import pandas as pd
import torchaudio
from IPython.display import Audio
import matplotlib.pyplot as plt
from numpy import ndarray

os.chdir("..")

from src.utils import *
from tqdm import tqdm

def add_scaled_noise(signal: ndarray, intensity=0.5, noise_type="gaussian") -> ndarray:
    """
    Dodaje szum o określonej intensywności względem sygnału.

    :param signal: Sygnał wejściowy.
    :param intensity: Intensywność szumu względem RMS sygnału (np. 0.5 dla 50%).
    :param noise_type: Typ szumu ('gaussian', 'rayleigh', 'poisson').
    :return: Sygnał z dodanym szumem.
    """
    rms_signal = np.sqrt(np.mean(signal**2))
    rms_noise = intensity * rms_signal

    match noise_type:
        case "gaussian":
            noise = np.random.normal(0, rms_noise, signal.shape)
        case "rayleigh":
            # Rayleigh noise requires scaling its parameter to achieve target RMS
            sigma = rms_noise / np.sqrt(2 - np.pi / 2)
            noise = np.random.rayleigh(sigma, signal.shape)
        case "poisson":
            # Scale Poisson noise approximation
            # Use the signal as lambda and scale it to match RMS intensity
            noise = np.random.poisson(rms_noise, signal.shape) - rms_signal
            noise = noise * rms_noise / np.sqrt(np.mean(noise**2))
        case _:
            raise ValueError("Nieobsługiwany typ szumu.")

    return signal + noise

def add_noise_to_dataset(df, dataset_path, intensity, output_path, noise_type="gaussian"):
    os.makedirs(output_path, exist_ok=True)
    # iterate over all files
    for index, row in tqdm(df.iterrows(), total=df.shape[0]):
        # load audio file
        os.makedirs(os.path.join(output_path, row['person_id']), exist_ok=True)

        audio, sample_rate = torchaudio.load(row['path'])
        audio = audio.numpy()
        # add noise
        audio_noisy = add_scaled_noise(audio, intensity=intensity, noise_type=noise_type)
        # save audio file
        torchaudio.save(os.path.join(output_path, row['person_id'], row['utterance_filename']), torch.tensor(audio_noisy), sample_rate)

dataset_path = "data/vox1_test_wav"
df = scan_directory_voxceleb1(dataset_path)


100%|██████████| 4874/4874 [00:15<00:00, 323.91it/s]


In [None]:
intensity = 0.01
output_path = f"data/noisy/vox1/gaussian_{intensity}"

add_noise_to_dataset(df, dataset_path, intensity, output_path)

In [None]:
intensity = 0.1
output_path = f"data/noisy/vox1/gaussian_{intensity}"

add_noise_to_dataset(df, dataset_path, intensity, output_path)

In [None]:
intensity = 0.5
output_path = f"data/noisy/vox1/gaussian_{intensity}"

add_noise_to_dataset(df, dataset_path, intensity, output_path)