In [5]:
import os

os.chdir("..")
from src.utils import *

print("Current Working Directory ", os.getcwd())

Current Working Directory  c:\Users\filip\Documents\code\SV-eval


In [6]:
data_dir = "data"
data_type = "noisy"
noise_type = "gaussian"
snr = 0
dataset_name = f"vox1_test_segments_snr_{snr}_noisy_{noise_type}"

noisy_dt_path = os.path.join(data_dir, data_type, noise_type, dataset_name)
if not os.path.exists(noisy_dt_path):
    print(f"Directory {noisy_dt_path} does not exist")
    raise FileNotFoundError(f"Directory {noisy_dt_path} does not exist")
print(f"Directory {noisy_dt_path} exists")

Directory data\noisy\gaussian\vox1_test_segments_snr_0_noisy_gaussian exists


In [7]:
import torch
import torchaudio
from speechbrain.inference.enhancement import SpectralMaskEnhancement

enhance_model = SpectralMaskEnhancement.from_hparams(
    source="speechbrain/metricgan-plus-voicebank",
    run_opts={"device":"cuda"}
)


  state_dict = torch.load(path, map_location=device)


In [8]:

from tqdm.notebook import tqdm

def denoise_dataset_metricgan(noisy_dataset, data_dir, subdir, noise_type):
    output_dir = os.path.join(data_dir, subdir, noise_type, dataset_name)
    os.makedirs(output_dir, exist_ok=True)

    for index, row in tqdm(noisy_dataset.iterrows(), total=noisy_dataset.shape[0]):
        noisy_path = row["path"]
        
        # Load noisy audio
        noisy, sr = torchaudio.load(noisy_path)
        if sr != 16000:
            noisy = torchaudio.functional.resample(noisy, sr, 16000)
            print(f"Resampling {noisy_path} from {sr} to 16000")
            sr = 16000

        # Add batch dimension and relative length tensor
        noisy = noisy.to('cuda')
        enhanced = enhance_model.enhance_batch(noisy, lengths=torch.tensor([1.0]))

        # Create output directory for person
        person_subdir = os.path.join(output_dir, row["person_id"])
        os.makedirs(person_subdir, exist_ok=True)

        # Save enhanced audio
        enhanced_path = os.path.join(person_subdir, os.path.basename(noisy_path))
        torchaudio.save(enhanced_path, enhanced.cpu(), sr)

In [9]:


# Process synthetic noise types
noise_types = ["gaussian", "poisson", "rayleigh"]
snrs = [0, 5, 10, 15, 20]
data_dir = "data"
data_type = "noisy"
output_subdir = "metricgan"

for noise_type in noise_types:
    for snr in snrs:
        dataset_name = f"vox1_test_segments_snr_{snr}_noisy_{noise_type}"

        noisy_dt_path = os.path.join(data_dir, data_type, noise_type, dataset_name)
        if not os.path.exists(noisy_dt_path):
            print(f"Directory {noisy_dt_path} does not exist")
            continue
        print(f"Directory {noisy_dt_path} exists")

        noisy_dataset = scan_directory_voxceleb2(noisy_dt_path)
        denoise_dataset_metricgan(
            noisy_dataset, data_dir, output_subdir, noise_type, 
        )

Directory data\noisy\gaussian\vox1_test_segments_snr_0_noisy_gaussian exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\gaussian\vox1_test_segments_snr_5_noisy_gaussian exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\gaussian\vox1_test_segments_snr_10_noisy_gaussian exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\gaussian\vox1_test_segments_snr_15_noisy_gaussian exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\gaussian\vox1_test_segments_snr_20_noisy_gaussian exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\poisson\vox1_test_segments_snr_0_noisy_poisson exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\poisson\vox1_test_segments_snr_5_noisy_poisson exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\poisson\vox1_test_segments_snr_10_noisy_poisson exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\poisson\vox1_test_segments_snr_15_noisy_poisson exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\poisson\vox1_test_segments_snr_20_noisy_poisson exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\rayleigh\vox1_test_segments_snr_0_noisy_rayleigh exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\rayleigh\vox1_test_segments_snr_5_noisy_rayleigh exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\rayleigh\vox1_test_segments_snr_10_noisy_rayleigh exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\rayleigh\vox1_test_segments_snr_15_noisy_rayleigh exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy\rayleigh\vox1_test_segments_snr_20_noisy_rayleigh exists


  0%|          | 0/9119 [00:00<?, ?it/s]

In [10]:
# Process real background noise types
noise_types = ["AirConditioner", "Babble", "Neighbor"]
snrs = [0, 5, 10, 15, 20]
data_dir = "data"
data_type = "noisy_bg\\vox1_test_wav_bq_noise"
output_subdir = "metricgan"

for noise_type in noise_types:
    for snr in snrs:
        dataset_name = f"vox1_test_wav_snr_{snr}_{noise_type}"

        noisy_dt_path = os.path.join(data_dir, data_type, noise_type, dataset_name)
        if not os.path.exists(noisy_dt_path):
            print(f"Directory {noisy_dt_path} does not exist")
            continue
        print(f"Directory {noisy_dt_path} exists")

        noisy_dataset = scan_directory_voxceleb2(noisy_dt_path)
        denoise_dataset_metricgan(
            noisy_dataset, data_dir, output_subdir, noise_type
        )

Directory data\noisy_bg\vox1_test_wav_bq_noise\AirConditioner\vox1_test_wav_snr_0_AirConditioner exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\AirConditioner\vox1_test_wav_snr_5_AirConditioner exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\AirConditioner\vox1_test_wav_snr_10_AirConditioner exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\AirConditioner\vox1_test_wav_snr_15_AirConditioner exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\AirConditioner\vox1_test_wav_snr_20_AirConditioner exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Babble\vox1_test_wav_snr_0_Babble exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Babble\vox1_test_wav_snr_5_Babble exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Babble\vox1_test_wav_snr_10_Babble exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Babble\vox1_test_wav_snr_15_Babble exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Babble\vox1_test_wav_snr_20_Babble exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Neighbor\vox1_test_wav_snr_0_Neighbor exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Neighbor\vox1_test_wav_snr_5_Neighbor exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Neighbor\vox1_test_wav_snr_10_Neighbor exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Neighbor\vox1_test_wav_snr_15_Neighbor exists


  0%|          | 0/9119 [00:00<?, ?it/s]

Directory data\noisy_bg\vox1_test_wav_bq_noise\Neighbor\vox1_test_wav_snr_20_Neighbor exists


  0%|          | 0/9119 [00:00<?, ?it/s]