In [3]:
import numpy as np
from pathlib import Path

In [None]:
dataset_path = Path('/media/ic/datasets/denoising/DS_10283_2791')
clean_path = dataset_path / 'clean_testset_wav'
noisy_path = dataset_path / 'noisy_testset_wav'

In [5]:
clean_wavs = list(clean_path.glob("*"))
noisy_wavs = list(noisy_path.glob("*"))

In [6]:
len(clean_wavs), len(noisy_wavs)

(824, 824)

In [7]:
from IPython.display import Audio, display

In [8]:
def display_pair(i):
    display(Audio(noisy_wavs[i], rate=48000))
    display(Audio(clean_wavs[i], rate=48000))
display_pair(-1)

- SDR https://torchmetrics.readthedocs.io/en/stable/audio/signal_distortion_ratio.html
- SI-SNR https://torchmetrics.readthedocs.io/en/stable/audio/scale_invariant_signal_noise_ratio.html?highlight=Si-SNR

In [7]:
from torchmetrics.audio.pesq import PerceptualEvaluationSpeechQuality
from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility
import torch
import torchaudio

from denoisers.SpectralGating import SpectralGating


class Metrics:
    def __init__(self, rate=16000):
        self.nb_pesq = PerceptualEvaluationSpeechQuality(rate, 'wb')
        self.stoi = ShortTimeObjectiveIntelligibility(rate, False)
    def calculate(self, preds, target):
        return {'PESQ': self.nb_pesq(preds, target), 
                'STOI': self.stoi(preds, target)}

def load_wav(path):
    wav, org_sr = torchaudio.load(path)
    wav = torchaudio.functional.resample(wav, orig_freq=org_sr, new_freq=16000)
    return wav




model = SpectralGating()
metrics = Metrics()

clean_wav = load_wav(clean_wavs[0])
noisy_wav = load_wav(noisy_wavs[0])
denoised = model(noisy_wav)


In [8]:
metrics.calculate(noisy_wav, clean_wav)

{'PESQ': tensor(1.2861), 'STOI': tensor(0.9472)}

In [9]:
metrics.calculate(denoised, clean_wav)

{'PESQ': tensor(1.5215), 'STOI': tensor(0.9407)}

In [10]:
from tqdm import tqdm
mean_scores_ideal = {'PESQ': 0,'STOI': 0}
mean_scores_model =  {'PESQ': 0, 'STOI': 0}

for clean_path, noisy_path in tqdm(zip(clean_wavs[:10], noisy_wavs[:10])):
    clean_wav = load_wav(clean_path)
    noisy_wav = load_wav(noisy_path)
    denoised_wav = model(noisy_wav)
    
    scores_ideal = metrics.calculate(noisy_wav, clean_wav)
    scores_model = metrics.calculate(noisy_wav, denoised_wav)
    
    mean_scores_ideal['PESQ'] += scores_ideal['PESQ']
    mean_scores_ideal['STOI'] += scores_ideal['STOI']
    
    mean_scores_model['PESQ'] += scores_model['PESQ']
    mean_scores_model['STOI'] += scores_model['STOI']

mean_scores_ideal['PESQ'] = mean_scores_ideal['PESQ'] / len(clean_wavs)
mean_scores_ideal['STOI'] = mean_scores_ideal['STOI'] / len(clean_wavs)
mean_scores_model['PESQ'] = mean_scores_model['PESQ'] / len(clean_wavs)
mean_scores_model['STOI'] = mean_scores_model['STOI'] / len(clean_wavs)

10it [00:02,  3.75it/s]


In [11]:
mean_scores_ideal

{'PESQ': tensor(0.0215), 'STOI': tensor(0.0110)}

In [12]:
mean_scores_model

{'PESQ': tensor(0.0207), 'STOI': tensor(0.0116)}

In [41]:
def inference(i):
    prediction = model(noisy_wavs[i])
    display(Audio(noisy_wavs[i],rate=48000))
    display(Audio(clean_wavs[i],rate=48000))
    display(Audio(prediction,rate=48000))
    
inference(5)