In [12]:
# !pip install torchmetrics[audio]
# !pip install pesq
import soundfile as sf
import torch
import torchmetrics
from torchmetrics.functional.audio.pesq import perceptual_evaluation_speech_quality
from torchmetrics.functional.audio import signal_distortion_ratio, scale_invariant_signal_distortion_ratio, signal_noise_ratio

In [4]:
data_folder = '/content/drive/MyDrive/DSP/'

In [5]:
orig, sr = sf.read(data_folder + 'gt.wav')
orig_recorded, sr = sf.read(data_folder + 'gt_recorded.wav')
orig_convolved, sr = sf.read(data_folder + 'gt_convolved.wav')

In [6]:
orig = torch.tensor(orig)
orig_recorded = torch.tensor(orig_recorded[:orig.shape[0],0])
orig_convolved = torch.tensor(orig_convolved)

In [27]:
def count_metrics(bad, orig):
    """
    Считает торчовые метрики
    """
    metrics = {}
    metrics['snr'] = signal_noise_ratio(bad, orig).item()
    metrics['sdr'] = signal_distortion_ratio(bad, orig).item()
    metrics['sisdr'] = scale_invariant_signal_distortion_ratio(bad, orig).item()
    metrics['pesq'] = perceptual_evaluation_speech_quality(bad, orig, fs=8000, mode='nb').item()

    return metrics

## PESQ работает ТОЛЬКО по файлам 8 или 16 кГц. Для того, чтобы ее правильно использовать - надо ресемплить аудио в 16кГц и юзать WB режим (либо ресемплить в 8 и юзать NB). То же самое для DNSMOS.

In [14]:
count_metrics(orig_recorded, orig)

{'snr': -0.43122999666163075,
 'sdr': -2.3062697003655113,
 'sisdr': -12.796161772764965,
 'pesq': 2.208599805831909}

In [15]:
count_metrics(orig_convolved, orig)

{'snr': -2.8770304620141793,
 'sdr': -23.588012838551098,
 'sisdr': -58.114662545772205,
 'pesq': 2.3284718990325928}

Теперь посчитаем более сложные метрики, основанные на нейронках

# NISQA

In [None]:
!git clone https://github.com/gabrielmittag/NISQA.git

In [22]:
data_folder + 'gt_recorded.wav'

'/content/drive/MyDrive/DSP/gt_recorded.wav'

In [25]:
!python NISQA/run_predict.py --mode predict_file --pretrained_model NISQA/weights/nisqa.tar --deg /content/drive/MyDrive/DSP/gt.wav --output_dir ./results

Device: cpu
Model architecture: NISQA_DIM
Loaded pretrained model from NISQA/weights/nisqa.tar
---> Predicting ...
   deg  mos_pred  noi_pred  dis_pred  col_pred  loud_pred   model
gt.wav  4.706658  4.499088  4.600805  4.408635    4.54724 NISQAv2


In [24]:
!python NISQA/run_predict.py --mode predict_file --pretrained_model NISQA/weights/nisqa.tar --deg /content/drive/MyDrive/DSP/gt_recorded.wav --output_dir ./results

Device: cpu
Model architecture: NISQA_DIM
Loaded pretrained model from NISQA/weights/nisqa.tar
---> Predicting ...
            deg  mos_pred  noi_pred  dis_pred  col_pred  loud_pred   model
gt_recorded.wav  2.869495  3.949003  4.017098  2.590037   2.898912 NISQAv2


In [26]:
!python NISQA/run_predict.py --mode predict_file --pretrained_model NISQA/weights/nisqa.tar --deg /content/drive/MyDrive/DSP/gt_convolved.wav --output_dir ./results

Device: cpu
Model architecture: NISQA_DIM
Loaded pretrained model from NISQA/weights/nisqa.tar
---> Predicting ...
             deg  mos_pred  noi_pred  dis_pred  col_pred  loud_pred   model
gt_convolved.wav  0.940502  2.145077  2.924746  1.222082   2.148507 NISQAv2


| файл | SNR | SDR | SI-SDR |	PESQ | NISQA (все пять значений) | DNSMOS | MOS |
| --- | --- | --- | --- | --- | --- | --- | --- |
| gt.wav (оригинал) | --- | --- | --- | --- | (4.7, 4.5, 4.6, 4.4, 4.5) | --- | 5 (excellent) |
| gt_recorded.wav (записанный) | -0.43 | -2.3 | -12.8 | 2.21 |  (2.9, 3.9, 4.0, 2.6, 2.9) | --- | 4 (good) |
| gt_convolved.wav (свернутый) | -2.88 | -23.6 | -58.1 | 2.32 | (0.9, 2.1, 2.9, 1.2, 2.1) | --- | 1 (bad) |

# DNSMOS

Тут нужно было запрашивать какой-то ключ

## Там в репе скрипт dnsmos_local.py есть)