## SDR: Signal-to-Distortion Ratio

<hr>

### Version of Python

In [1]:
!python --version

Python 3.10.11


### Import required libraries

In [2]:
# Suppression of warnings
import warnings
for warn in [UserWarning, FutureWarning]: warnings.filterwarnings('ignore', category = warn)

In [3]:
import os
import math
import torch
import torchaudio
import torchmetrics
import numpy as np
import pandas as pd
import matplotlib
import jupyterlab as jlab

### Versions of required libraries

In [4]:
pkgs = {
    'Package': [
        'PyTorch', 'TorchAudio', 'TorchMetrics', 'NumPy', 'Pandas', 'Matplotlib', 'JupyterLab'
    ],
    'Version': [i.__version__ for i in [
        torch, torchaudio, torchmetrics, np, pd, matplotlib, jlab
    ]]
}

df_pkgs = pd.DataFrame(data = pkgs)
df_pkgs.index.name = 'No'
df_pkgs.index += 1

display(df_pkgs)

Unnamed: 0_level_0,Package,Version
No,Unnamed: 1_level_1,Unnamed: 2_level_1
1,PyTorch,2.2.1
2,TorchAudio,2.2.1
3,TorchMetrics,1.3.2
4,NumPy,1.26.4
5,Pandas,2.2.1
6,Matplotlib,3.8.3
7,JupyterLab,4.1.5


### SDR: Signal-to-Distortion Ratio (1)

In [5]:
# Path to clean and noisy speech signals
clean_speech_path = 'DS_10283_2791/clean_trainset_56spk_wav/p234_001.wav'
noisy_speech_path = 'DS_10283_2791/noisy_trainset_56spk_wav/p234_001.wav'

# Load clean and noisy speech signals
clean_speech, sample_rate = torchaudio.load(clean_speech_path)
noisy_speech, _ = torchaudio.load(noisy_speech_path)

# Ensure clean and noisy signals have the same length
assert clean_speech.shape == noisy_speech.shape, "Clean and noisy signal tensors must have the same shape"

# Compute SDR
sdr = torchmetrics.audio.SignalDistortionRatio(filter_length = 512)(noisy_speech, clean_speech)

# Print result
print(f'Signal-to-Distortion Ratio (SDR): {sdr:.5f} dB')

Signal-to-Distortion Ratio (SDR): 13.61730 dB


### SDR: Signal-to-Distortion Ratio (2)

In [6]:
def compute_sdr(clean_signal, noisy_signal, filter_length=512):
    """
    Compute the Signal-to-Distortion Ratio (SDR) between a clean and noisy signal.

    Args:
        clean_signal (torch.Tensor): The clean signal tensor.
        noisy_signal (torch.Tensor): The noisy signal tensor.
        filter_length (int): The length of the filter used for computing SDR.

    Returns:
        float: The computed Signal-to-Distortion Ratio (SDR) in decibels (dB).
    """

    assert clean_signal.shape == noisy_signal.shape, "Clean and noisy signal tensors must have the same shape"

    # Convert to double precision if necessary
    if clean_signal.dtype != torch.float64:
        clean_signal = clean_signal.double()
    if noisy_signal.dtype != torch.float64:
        noisy_signal = noisy_signal.double()

    # Normalize tensors
    clean_signal /= torch.clamp(torch.norm(clean_signal, dim=-1, keepdim=True), min=1e-6)
    noisy_signal /= torch.clamp(torch.norm(noisy_signal, dim=-1, keepdim=True), min=1e-6)

    n_fft = 2 ** math.ceil(math.log2(noisy_signal.shape[-1] + clean_signal.shape[-1] - 1))

    # Compute auto-correlation of clean_signal
    t_fft_clean = torch.fft.rfft(clean_signal, n=n_fft, dim=-1)
    r_0 = torch.fft.irfft(t_fft_clean.real**2 + t_fft_clean.imag**2, n=n_fft)[..., :filter_length]

    # Compute cross-correlation of clean_signal and noisy_signal
    p_fft_noisy = torch.fft.rfft(noisy_signal, n=n_fft, dim=-1)
    b = torch.fft.irfft(t_fft_clean.conj() * p_fft_noisy, n=n_fft, dim=-1)[..., :filter_length]

    # Compute symmetric Toeplitz matrix
    vec_exp = torch.cat([torch.flip(r_0, dims=(-1,)), r_0[..., 1:]], dim=-1)
    v_len = r_0.shape[-1]
    symmetric_toeplitz = torch.as_strided(
        vec_exp, size=vec_exp.shape[:-1] + (v_len, v_len), stride=vec_exp.stride()[:-1] + (1, 1)
    ).flip(dims=(-1,))

    # Solve for the optimal filter
    sol = torch.linalg.solve(symmetric_toeplitz, b)

    # Compute the coherence
    coh = torch.einsum("...l,...l->...", b, sol)

    # Transform to decibels
    ratio = coh / (1 - coh)
    sdr = 10.0 * torch.log10(ratio)

    return sdr.squeeze()

# Path to clean and noisy speech signals
clean_speech_path = 'DS_10283_2791/clean_trainset_56spk_wav/p234_001.wav'
noisy_speech_path = 'DS_10283_2791/noisy_trainset_56spk_wav/p234_001.wav'

# Load clean and noisy speech signals
clean_speech, _ = torchaudio.load(clean_speech_path)
noisy_speech, _ = torchaudio.load(noisy_speech_path)

# Compute SDR
sdr = compute_sdr(clean_speech, noisy_speech, 512)

# Print result
print(f'Signal-to-Distortion Ratio (SDR): {sdr:.5f} dB')

Signal-to-Distortion Ratio (SDR): 13.61730 dB
