# Define the Model

# START AYUSH'S CODE

In [1]:
# -*- coding: utf-8 -*-
"""
Created on Wed Nov 27 11:55:53 2024

@author: Ayush
"""

import os
import random
import librosa
import numpy as np
import soundfile as sf
import matplotlib.pyplot as plt
import librosa.display

# Preprocessing class for handling audio files
class Preprocessing:
    def load_audio(self, file_path: str) -> np.ndarray:
        """
        Load an audio file and return its waveform as a numpy array.

        Parameters:
        file_path (str): Path to the audio file.

        Returns:
        np.ndarray: Audio waveform, and sample rate.
        """
        audio, sr = librosa.load(file_path, sr=None)  # Load audio with original sampling rate
        return audio, sr

    def extract_spectrogram(self, audio: np.ndarray, sr: int = 22050) -> np.ndarray:
        """
        Convert the audio waveform to a Mel spectrogram.

        Parameters:
        audio (np.ndarray): The audio waveform.
        sr (int): The sampling rate of the audio.

        Returns:
        np.ndarray: Spectrogram (magnitude in decibels).
        """
        spectrogram = librosa.feature.melspectrogram(y=audio, sr=sr, n_fft=2048, hop_length=512)
        spectrogram_db = librosa.power_to_db(spectrogram, ref=np.max)
        return spectrogram_db

    def save_spectrogram(self, spectrogram: np.ndarray, file_path: str):
        """
        Save the spectrogram as a .npy file.

        Parameters:
        spectrogram (np.ndarray): The spectrogram to save.
        file_path (str): Path where to save the spectrogram file.
        """
        np.save(file_path, spectrogram)

    def match_length(self, audio: np.ndarray, target_length: int) -> np.ndarray:
        """
        Match the length of the audio to a target length by trimming or padding.

        Parameters:
        audio (np.ndarray): Input audio waveform.
        target_length (int): Target length for the audio.

        Returns:
        np.ndarray: Length-matched audio waveform.
        """
        if len(audio) < target_length:
            # Pad with zeros if shorter
            audio = np.pad(audio, (0, target_length - len(audio)), mode='constant')
        else:
            # Trim if longer
            audio = audio[:target_length]
        return audio

# Function to get all noise files from a directory
def get_noise_files(noise_dir: str) -> list:
    """
    Retrieve all noise file paths from the specified directory.

    Parameters:
    noise_dir (str): Path to the directory containing noise files.

    Returns:
    list: List of noise file paths.
    """
    noise_files = []
    for root, _, files in os.walk(noise_dir):
        for file in files:
            if file.endswith(('.wav', '.flac')):  # Process only supported audio files
                noise_files.append(os.path.join(root, file))
    return noise_files

# Function to augment audio with multiple noise samples and save clean spectrograms
def process_audio_with_multiple_noises(input_dir: str, output_dir: str, noise_dir: str, noise_factor: float = 0.02, n_noises: int = 5):
    """
    Process all clean audio files in the input directory, augment them with multiple noise samples from the MS-SNSD folder,
    and save the processed audio and spectrograms, including the clean spectrogram.

    Parameters:
    input_dir (str): Path to the directory containing clean audio files.
    output_dir (str): Path to the directory for saving augmented audio and spectrograms.
    noise_dir (str): Path to the MS-SNSD noise folder.
    noise_factor (float): The factor by which noise is added to clean audio.
    n_noises (int): Number of random noise samples to apply to each clean audio file.
    """
    preprocess = Preprocessing()
    noise_files = get_noise_files(noise_dir)

    if not noise_files:
        print("No noise files found in the specified MS-SNSD directory!")
        return

    for root, _, files in os.walk(input_dir):
        for file in files:
            if file.endswith('.flac'):  # Only process .flac files
                clean_audio_path = os.path.join(root, file)
                clean_audio, sr = preprocess.load_audio(clean_audio_path)

                # Prepare output paths
                output_audio_path = os.path.join(output_dir, os.path.relpath(root, input_dir))
                if not os.path.exists(output_audio_path):
                    os.makedirs(output_audio_path)

                # Save clean spectrogram
                clean_spectrogram = preprocess.extract_spectrogram(clean_audio, sr)
                preprocess.save_spectrogram(clean_spectrogram, os.path.join(output_audio_path, f'clean_spectrogram_{file.replace(".flac", ".npy")}'))

                # Create multiple noisy versions
                for i in range(n_noises):
                    noise_file = random.choice(noise_files)
                    noise_audio, _ = librosa.load(noise_file, sr=None)
                    noise_audio = preprocess.match_length(noise_audio, len(clean_audio))
                    noisy_audio = clean_audio + noise_factor * noise_audio
                    noisy_audio = np.clip(noisy_audio, -1.0, 1.0)

                    # Save noisy audio
                    noisy_audio_file_path = os.path.join(output_audio_path, f'noisy_{i+1}_' + file)
                    sf.write(noisy_audio_file_path, noisy_audio, sr, format='FLAC')

                    # Extract and save noisy spectrogram
                    noisy_spectrogram = preprocess.extract_spectrogram(noisy_audio, sr)
                    preprocess.save_spectrogram(noisy_spectrogram, os.path.join(output_audio_path, f'noisy_spectrogram_{i+1}_' + file.replace('.flac', '.npy')))

                print(f"Processed {n_noises} noisy variants and clean spectrogram for: {clean_audio_path}")

# Call the function with directories
process_audio_with_multiple_noises(
    input_dir="data/train/LibriSpeech/dev-clean",  # Directory containing clean audio
    output_dir="noisy-train",  # Directory to save augmented files
    noise_dir="noise-dir",  # Directory containing noise files
    noise_factor=0.2,  # Adjust noise factor as needed
    n_noises=1  # Number of noisy variants to generate per audio file
)


No noise files found in the specified MS-SNSD directory!


# END AYUSH'S CODE

In [4]:
def load_audio_into_array(local_data_dir: str):

  all_files = []

  # load audio files into memory
  for dir1 in sorted(os.listdir(local_data_dir)):
      for dir2 in sorted(os.listdir(local_data_dir + dir1)):
          for f in sorted(os.listdir(local_data_dir + dir1 + '/' + dir2)):
              file_path = local_data_dir + dir1 + '/' + dir2 + '/' + f
              if f.endswith('.flac'):
                  all_files.append(file_path)

  return all_files

In [5]:
def extract_windows(audio: np.ndarray, preprocessing: Preprocessing, sample_rate: int = 16000, window_duration: float = 1.02):
  """
  Extract windows of sample_rate * window_duration from the audio file. Extract
  spectrograms from them and return a list with all the spectrograms.
  Parameters:
  audio (np.ndarray): audio file to extract windows from.
  preprocessing (Preprocessing): Instance of preprocessing class.
  sample_rate (int): sampling rate of the audio file.
  window_duration (float): duration of the windows in seconds.
  """
  window_samples = int(sample_rate * window_duration)

  spectrograms = []

  for start in range(0, len(audio), window_samples):
      end = start + window_samples

      window = audio[start:end]

      if len(window) < window_samples:
        # pad with silence until the window is filled
        padding = window_samples - len(window)
        window = np.pad(window, (0, padding), mode='constant')

      spectrogram = preprocessing.extract_spectrogram(window, sample_rate)
      spectrograms.append(spectrogram)

  return spectrograms

In [6]:
def get_constant_length_spectrograms(audios: list[np.ndarray], sample_rate: int = 16000, window_duration: float = 1.02):
  """
  Extract spectrograms of a constant length from a list of audios. Return
  the list of spectrograms.
  Parameters:
  audio_files (list[np.ndarray]): list of audios for extraction.
  sample_rate (int): sampling rate of the audio file.
  window_duration (float): duration of the windows in seconds.
  """
  window_samples = int(sample_rate * window_duration)
  preprocessing = Preprocessing()

  spectrograms = []

  for audio in audios:
    # audio, sr = librosa.load(file, sr=sample_rate)
    extracted_spectrograms = extract_windows(audio, preprocessing, sample_rate, window_duration)
    spectrograms.extend(extracted_spectrograms)

  return spectrograms

In [7]:
def plot_spectrogram(spectrogram, title):
  """
  Plot the spectrogram.
  Parameters:
  spectrogram (np.ndarray): The spectrogram to plot.
  title (str): Title for the plot.
  """
  plt.figure(figsize=(10, 4))
  librosa.display.specshow(spectrogram, sr=16000, hop_length=512, x_axis='time', y_axis='mel', cmap='viridis')
  plt.colorbar(format='%+2.0f dB')
  plt.title(title)
  plt.tight_layout()
  plt.show()

In [8]:
def inverse_spectrogram(spectrogram: np.ndarray, volume_factor: int = 1):
  """
  Turn the spectrogram back into audio.
  Parameters:
  spectrogram (np.ndarray): The spectrogram to turn back into audio.
  volume_factor (int): The volume factor to apply to the audio.
  """
  mel_spectrogram = librosa.db_to_power(spectrogram)
  linear_spectrogram = librosa.feature.inverse.mel_to_stft(mel_spectrogram, sr=16000, n_fft=2048)
  audio_signal = librosa.griffinlim(linear_spectrogram, n_iter=32, hop_length=512, n_fft=2048)
  audio_signal *= volume_factor
  return audio_signal

In [9]:
def add_rgb_channels_and_normalize(spectrograms: list[np.ndarray]) -> list[np.ndarray]:
  """
  Add RGB channels to the spectrogram and normalize it.
  Parameters:
  spectrograms (list[np.ndarray]): List of spectrogram arrays.
  """
  min_val = -80
  max_val = 0

  rgb_spectrograms = np.repeat(np.array(spectrograms)[..., np.newaxis], 3, axis=-1)
  normalized_spectrograms = (rgb_spectrograms - min_val) / (max_val - min_val)

  return normalized_spectrograms

In [10]:
def add_single_channel_and_normalize(spectrograms: list[np.ndarray]) -> list[np.ndarray]:
  """
  Add single channel to the spectrogram [turn shape from (x, 128, 32) to
  (x, 128, 32, 1)] and normalize it.
  Parameters:
  spectrograms (list[np.ndarray]): List of spectrogram arrays.
  """
  min_val = -80
  max_val = 0

  rgb_spectrograms = np.repeat(np.array(spectrograms)[..., np.newaxis], 1, axis=-1)
  normalized_spectrograms = (rgb_spectrograms - min_val) / (max_val - min_val)

  return normalized_spectrograms

In [11]:
from tensorflow import keras
from keras.layers import Input, Conv2D, MaxPooling2D, UpSampling2D, Concatenate
from keras.models import Model

def unet(input_shape=(128, 32, 1)):
  """
  Define the U-Net model for speech enhancement.
  Parameters:
  input_shape (tuple): Shape of the input spectrogram.
  """
  inputs = Input(input_shape)

  # Encoder
  c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(inputs)
  c1 = Conv2D(64, (3, 3), activation='relu', padding='same')(c1)
  p1 = MaxPooling2D((2, 2))(c1)

  c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(p1)
  c2 = Conv2D(128, (3, 3), activation='relu', padding='same')(c2)
  p2 = MaxPooling2D((2, 2))(c2)

  # Bottleneck
  b = Conv2D(256, (3, 3), activation='relu', padding='same')(p2)

  # Decoder
  u1 = UpSampling2D((2, 2))(b)
  u1 = Concatenate()([u1, c2])
  c3 = Conv2D(128, (3, 3), activation='relu', padding='same')(u1)
  c3 = Conv2D(128, (3, 3), activation='relu', padding='same')(c3)

  u2 = UpSampling2D((2, 2))(c3)
  u2 = Concatenate()([u2, c1])
  c4 = Conv2D(64, (3, 3), activation='relu', padding='same')(u2)
  c4 = Conv2D(64, (3, 3), activation='relu', padding='same')(c4)

  outputs = Conv2D(1, (1, 1), activation='sigmoid')(c4)

  return Model(inputs, outputs)

In [12]:
import tensorflow as tf
from keras.saving import register_keras_serializable

@register_keras_serializable()
def ssim_loss(y_true, y_pred):
  """
  Compute the SSIM loss between the true and predicted spectrograms.
  Parameters:
  y_true (tf.Tensor): True spectrogram.
  y_pred (tf.Tensor): Predicted spectrogram.
  """
  return 1 - tf.reduce_mean(tf.image.ssim(y_true, y_pred, max_val=1.0))

In [13]:
def compile_model(loc_model: keras.Model, optimizer: str, loss_function: str):
  """
  Compile the model with the given optimizer and loss function.
  Parameters:
  loc_model (keras.Model): The model to compile.
  optimizer (str): The optimizer to use.
  loss_function (str): The loss function to use.
  """
  loc_model.compile(optimizer=optimizer, loss=ssim_loss, metrics=['mae'])

In [14]:
def save_model(loc_model: keras.Model, file_path: str):
  """
  Save the model to the given file path.
  Parameters:
  loc_model (keras.Model): The model to save.
  file_path (str): The file path to save the model to.
  """
  loc_model.save(file_path)

In [15]:
# In order for this to work, the ssim_loss function must have the @register_keras_serializable()
# annotation along with the function in the code where the model is used
downloaded_model = keras.models.load_model('trained_model_unet_traffic_cafe_vacuum.keras')

In [52]:
def denoise_audio(audio_file: str, model: keras.Model):
  """
  Denoise the audio file using the given model.
  Parameters:
  audio_file (str): The path to the audio file to denoise.
  model (keras.Model): The model to use for denoising.
  """
  audio, sr = librosa.load(audio_file, sr=16000)

  original_length = len(audio) # store the original length of the audio


  # Preprocess the audio file, turning it into spectrograms
  preprocessing = Preprocessing()
  extracted_spectrograms = extract_windows(audio, preprocessing, 16000, 1.02)
  normalized_spectrograms = add_single_channel_and_normalize(extracted_spectrograms)

  # Get predictions
  predictions = model.predict(normalized_spectrograms)

  min_val = -80
  max_val = 0

  # Process the predicted spetrograms back into audio
  unscaled_predictions = predictions * (max_val - min_val) + min_val
  predicted_spectrograms = np.squeeze(unscaled_predictions, axis=-1)
  combined_spectrogram = np.hstack(predicted_spectrograms)
  predicted_audio = inverse_spectrogram(combined_spectrogram)
  predicted_audio = predicted_audio[:original_length] # trim to match original length


  return predicted_audio

In [None]:
test_prediction = denoise_audio('./noisy-test/4446/2271/noisy_1_4446-2271-0018.flac', downloaded_model)

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 376ms/step


In [None]:
# What the predicted clean audio sounds like
display(Audio(test_prediction, rate=16000))

In [65]:
# extract a zip file
import zipfile

with zipfile.ZipFile('noise_test_dataset_mini.zip', 'r') as zip_ref:
    zip_ref.extractall('noise_test_dataset_mini')


In [68]:
import os
import pathlib

def find_file_pairs(base_directory):
    """
    Recursively find pairs of clean and noisy files in a directory and its subdirectories.

    Args:
        base_directory (str): Root directory to start searching from

    Returns:
        list: A list of tuples containing (clean_file_path, noisy_file_path)
    """

    matched_files = []

    # Walk through all directories and subdirectories
    for root, dirs, files in os.walk(base_directory):
        print(root, dirs, files)
        # Create a dictionary to match clean and noisy files

        # Collect files, looking for clean and noisy versions
        for testfile in files:

            clean_audio_path = os.path.join(root, testfile)

            print(clean_audio_path)

            filepath = os.path.join(root, testfile)

            # Check if filename starts with 'clean_' or 'noisy_'
            if testfile.startswith('clean_'):
              noisy_file = testfile.replace('clean_', 'noisy_')
              noisy_audio_path = os.path.join(root, noisy_file)

              matched_files.append((noisy_audio_path, filepath))

    return matched_files

if __name__ == "__main__":
    # Replace 'your_directory_path' with the path to your root directory
    base_dir = '/content/noise_test_dataset_mini'

    # Find file pairs
    pairs = find_file_pairs(base_dir)
    print(pairs)
    # Print out the pairs
    print(f"Found {len(pairs)} file pairs:")


/content/noise_test_dataset_mini [] ['clean_1_237-126133-0005.flac', 'noisy_1_237-126133-0009.flac', 'clean_1_237-126133-0006.flac', 'noisy_1_237-126133-0011.flac', 'noisy_1_237-126133-0019.flac', 'noisy_1_237-126133-0021.flac', 'noisy_1_237-126133-0016.flac', 'noisy_1_237-126133-0008.flac', 'noisy_1_237-126133-0017.flac', 'noisy_1_237-126133-0018.flac', 'clean_1_237-126133-0024.flac', 'clean_1_237-126133-0011.flac', 'noisy_1_237-126133-0020.flac', 'clean_1_237-126133-0020.flac', 'noisy_1_237-126133-0006.flac', 'noisy_1_237-126133-0025.flac', 'clean_1_237-126133-0002.flac', 'noisy_1_237-126133-0002.flac', 'clean_1_237-126133-0019.flac', 'noisy_1_237-126133-0001.flac', 'clean_1_237-126133-0014.flac', 'noisy_1_237-126133-0005.flac', 'noisy_1_237-126133-0007.flac', 'clean_1_237-126133-0023.flac', 'clean_1_237-126133-0000.flac', 'clean_1_237-126133-0013.flac', 'clean_1_237-126133-0007.flac', 'noisy_1_237-126133-0003.flac', 'noisy_1_237-126133-0022.flac', 'clean_1_237-126133-0017.flac', 'cl

In [27]:
# play noisy audio in colab

import IPython.display as ipd
ipd.Audio('/content/noise-test-dataset/noise_test_dataset/noisy_3_2094-142345-0021.flac')

In [26]:
# play clean audio in colab

ipd.Audio('/content/noise-test-dataset/noise_test_dataset/clean_3_2094-142345-0021.flac')

## Now we run test

In [29]:
!pip install pesq

Collecting pesq
  Using cached pesq-0.0.4.tar.gz (38 kB)
  Preparing metadata (setup.py) ... [?25l[?25hdone
Building wheels for collected packages: pesq
  Building wheel for pesq (setup.py) ... [?25l[?25hdone
  Created wheel for pesq: filename=pesq-0.0.4-cp310-cp310-linux_x86_64.whl size=262949 sha256=3bdeaff0d64d2127f3b1d726ecce11e36f0c002808346027cdbfa0773ec6a7fb
  Stored in directory: /root/.cache/pip/wheels/c5/4e/2c/251524370c0fdd659e99639a0fbd0ca5a782c3aafcd456b28d
Successfully built pesq
Installing collected packages: pesq
Successfully installed pesq-0.0.4


In [70]:
import numpy as np
import librosa
import soundfile as sf
import tensorflow as tf
from skimage.metrics import structural_similarity as ssim
import os
from pesq import pesq  # PESQ calculation
from scipy.signal import correlate

class Evaluation:
    def __init__(self, model_weights_path: str, test_data_path: str):
        self.model = self.load_model(model_weights_path)
        self.noisy_audio_path = test_data_path

    def load_model(self, model_weights_path: str) -> tf.keras.Model:
        model = tf.keras.models.load_model(model_weights_path)
        return model

    def find_file_pairs(self, base_directory):
        matched_files = []
        for root, dirs, files in os.walk(base_directory):
            for testfile in files:
                clean_audio_path = os.path.join(root, testfile)
                print(clean_audio_path)
                filepath = os.path.join(root, testfile)
                if testfile.startswith('clean_'):
                  noisy_file = testfile.replace('clean_', 'noisy_')
                  noisy_audio_path = os.path.join(root, noisy_file)
                  matched_files.append((noisy_audio_path, filepath))
        return matched_files

    def load_test_data(self, file_path: str) -> np.ndarray:
        # Load audio file and return its waveform
        audio, sr = librosa.load(file_path, sr=16000)
        return audio

    def evaluate_model(self) -> dict:

      psnr_value = 0
      ssim_value = 0
      mse_value = 0
      snr_value = 0
      pesq_value = 0
      mcd_value = 0

      pairs = self.find_file_pairs(self.noisy_audio_path)

      length = len(pairs)

      for pair in pairs:
        self.noisy_audio_path = pair[0]
        self.clean_audio_path = pair[1]

        clean_audio = self.load_test_data(self.clean_audio_path)

        original_length = len(clean_audio) # store the original length of the audio


        # Make predictions
        predicted_audio = denoise_audio(self.noisy_audio_path, downloaded_model)

        predicted_audio = predicted_audio[:original_length] # trim to match original length

        # Compute metrics
        psnr_value += self.compute_PSNR(clean_audio, predicted_audio)
        ssim_value += self.compute_SSIM(clean_audio, predicted_audio)
        mse_value += self.compute_MSE(clean_audio, predicted_audio)
        snr_value += self.compute_SNR(clean_audio, predicted_audio)
        pesq_value += self.compute_PESQ(clean_audio, predicted_audio)
        mcd_value += self.compute_MCD(clean_audio, predicted_audio)

      metrics = {
          'PSNR': psnr_value / length,
          'SSIM': ssim_value / length,
          'MSE': mse_value / length,
          'SNR': snr_value / length,
          'PESQ': pesq_value / length,
          'MCD': mcd_value / length
      }
      return metrics

    def compute_PSNR(self, clean_frames: np.ndarray, noisy_frames: np.ndarray) -> float:
        mse = np.mean((clean_frames - noisy_frames) ** 2)
        if mse == 0:
            return float('inf')  # Infinite PSNR if there is no noise
        max_pixel_value = 1.0  # Normalized range for audio signals
        psnr_value = 20 * np.log10(max_pixel_value / np.sqrt(mse))
        return psnr_value

    def compute_SSIM(self, clean_frames: np.ndarray, noisy_frames: np.ndarray) -> float:
        # SSIM expects 2D images; reshape if necessary
        return ssim(clean_frames, noisy_frames, data_range=clean_frames.max() - clean_frames.min())

    def compute_MSE(self, clean_frames: np.ndarray, noisy_frames: np.ndarray) -> float:
        return np.mean((clean_frames - noisy_frames) ** 2)

    def compute_SNR(self, clean_audio: np.ndarray, predicted_audio: np.ndarray) -> float:
        # Calculate the Signal-to-Noise Ratio (SNR)
        noise = clean_audio - predicted_audio
        signal_power = np.sum(clean_audio ** 2)
        noise_power = np.sum(noise ** 2)
        if noise_power == 0:
            return float('inf')  # Infinite SNR if there's no noise
        snr = 10 * np.log10(signal_power / noise_power)
        return snr

    def compute_PESQ(self, clean_audio: np.ndarray, predicted_audio: np.ndarray) -> float:
        # Use PESQ (requires sampling rate and proper file formats)
        # PESQ expects a clean reference and a degraded version
        return pesq(16000, clean_audio, predicted_audio, 'wb')

    def compute_MCD(self, clean_audio: np.ndarray, predicted_audio: np.ndarray) -> float:
        # Compute Mel Cepstral Distortion (MCD)
        clean_mfcc = librosa.feature.mfcc(y=clean_audio, sr=16000)
        predicted_mfcc = librosa.feature.mfcc(y=predicted_audio, sr=16000)
        mcd = np.mean(np.sqrt(np.sum((clean_mfcc - predicted_mfcc) ** 2, axis=0)))
        return mcd

    def generate_report(self, metrics: dict, file_path: str):
        with open(file_path, 'w') as f:
            for metric, value in metrics.items():
                f.write(f"{metric}: {value:.4f}\n")



###########################
# Main Evaluation

if __name__ == "__main__":
    evaluator = Evaluation(
        model_weights_path="modeldev_luke.h5", # enter your model name here
        test_data_path = "noise_test_dataset_mini",
    )

    metrics = evaluator.evaluate_model()
    evaluator.generate_report(metrics, "performance_report.txt")
    print("Evaluation complete. Report generated as 'performance_report.txt'.")




noise_test_dataset_mini/clean_1_237-126133-0005.flac
noise_test_dataset_mini/noisy_1_237-126133-0009.flac
noise_test_dataset_mini/clean_1_237-126133-0006.flac
noise_test_dataset_mini/noisy_1_237-126133-0011.flac
noise_test_dataset_mini/noisy_1_237-126133-0019.flac
noise_test_dataset_mini/noisy_1_237-126133-0021.flac
noise_test_dataset_mini/noisy_1_237-126133-0016.flac
noise_test_dataset_mini/noisy_1_237-126133-0008.flac
noise_test_dataset_mini/noisy_1_237-126133-0017.flac
noise_test_dataset_mini/noisy_1_237-126133-0018.flac
noise_test_dataset_mini/clean_1_237-126133-0024.flac
noise_test_dataset_mini/clean_1_237-126133-0011.flac
noise_test_dataset_mini/noisy_1_237-126133-0020.flac
noise_test_dataset_mini/clean_1_237-126133-0020.flac
noise_test_dataset_mini/noisy_1_237-126133-0006.flac
noise_test_dataset_mini/noisy_1_237-126133-0025.flac
noise_test_dataset_mini/clean_1_237-126133-0002.flac
noise_test_dataset_mini/noisy_1_237-126133-0002.flac
noise_test_dataset_mini/clean_1_237-126133-001