### Import necessary Library

In [1]:
#This script is used to import all the necessary libraries for the project
#The output of this script is the version of the libraries imported

import io
import os
import sys
import math
#import tarfile
import multiprocessing

import scipy
import librosa
import soundfile as sf

import requests
import matplotlib
import matplotlib.pyplot as plt
import pandas as pd
import numpy as np
import time
from IPython.display import Audio, display

import torch
import torchaudio
import torchaudio.functional as F
import torchaudio.transforms as T

import parselmouth
from parselmouth.praat import call
from pathlib import Path

from metrics import AudioMetrics
from metrics import AudioMetrics2
#from Audio_metrics import AudioMetrics2
import noise_addition_utils
from pypesq import pesq
import shutil

#import pathlib

print(torch.__version__)
print(torchaudio.__version__)


2.3.1+cpu
2.3.1+cpu


  torchaudio.set_audio_backend("soundfile")


In [8]:
#-------------------------------------------------------------------------------
# Helper functions.
#-------------------------------------------------------------------------------

def denoise_audio(input_path, output_path, smoothing_factor):
    # Load the audio file
    snd = parselmouth.Sound(input_path)

    # Apply smoothing to reduce noise
    snd_denoised = snd.copy()
    #snd_denoised = call(snd_denoised, "Smooth", smoothing_factor)
    snd_denoised = call(snd_denoised, "Reduce noise", 0.0, 0.0, 0.025, 80.0, 10000.0, 40.0,-20, "Spectral-subtraction")
    # Save the denoised audio
    snd_denoised.save(output_path, "WAV")
def get_stats(waveform, sample_rate=None, src=None):
    max_ = waveform.max().numpy()
    min_ = waveform.min().numpy()
    mean_ = waveform.mean().numpy()
    std_ = waveform.std().numpy()
    return max_,min_,mean_,std_

def print_stats(waveform, sample_rate=None, src=None):
    if src:
        print("-" * 10)
        print("Source:", src)
        print("-" * 10)
    if sample_rate:
        print("Sample Rate:", sample_rate)
        print("Shape:", tuple(waveform.shape))
        print("Dtype:", waveform.dtype)
        print(f" - Max:     {waveform.max().item():6.3f}")
        print(f" - Min:     {waveform.min().item():6.3f}")
        print(f" - Mean:    {waveform.mean().item():6.3f}")
        print(f" - Std Dev: {waveform.std().item():6.3f}")
        print()
        print(waveform)
        print()
    # max_ = waveform.max().numpy()
    # min_ = waveform.min().numpy()
    # mean_ = waveform.mean().numpy()
    # std_ = waveform.std().numpy()
    # return max_,min_,mean_,std_

def plot_waveform(waveform, sample_rate, title="Waveform", xlim=None, ylim=None):
  waveform = waveform.numpy()

  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate

  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].plot(time_axis, waveform[c], linewidth=1)
    axes[c].grid(True)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
    if ylim:
      axes[c].set_ylim(ylim)
  figure.suptitle(title)
  plt.show(block=False)

def plot_specgram(waveform, sample_rate, title="Spectrogram", xlim=None):
  waveform = waveform.numpy()
  num_channels, num_frames = waveform.shape
  time_axis = torch.arange(0, num_frames) / sample_rate
  figure, axes = plt.subplots(num_channels, 1)
  if num_channels == 1:
    axes = [axes]
  for c in range(num_channels):
    axes[c].specgram(waveform[c], Fs=sample_rate)
    if num_channels > 1:
      axes[c].set_ylabel(f'Channel {c+1}')
    if xlim:
      axes[c].set_xlim(xlim)
  figure.suptitle(title)
  plt.show(block=False)

def play_audio(waveform, sample_rate):
  waveform = waveform.numpy()
  num_channels, num_frames = waveform.shape
  if num_channels == 1:
    display(Audio(waveform[0], rate=sample_rate))
  elif num_channels == 2:
    display(Audio((waveform[0], waveform[1]), rate=sample_rate))
  else:
    raise ValueError("Waveform with more than 2 channels are not supported.")

def plot_spectrogram(spec, title=None, ylabel='freq_bin', aspect='auto', xmax=None):
  fig, axs = plt.subplots(1, 1)
  axs.set_title(title or 'Spectrogram (db)')
  axs.set_ylabel(ylabel)
  axs.set_xlabel('frame')
  im = axs.imshow(librosa.power_to_db(spec), origin='lower', aspect=aspect)
  if xmax:
    axs.set_xlim((0, xmax))
  fig.colorbar(im, ax=axs)
  plt.show(block=False)

def get_spectrogram(
    n_fft = 400,
    win_len = None,
    hop_len = None,
    power = 2.0,
):
  waveform, _ = get_speech_sample()
  spectrogram = T.Spectrogram(
      n_fft=n_fft,
      win_length=win_len,
      hop_length=hop_len,
      center=True,
      pad_mode="reflect",
      power=power,
  )
  return spectrogram(waveform)
def resample_wav_files(input_path, output_path, target_sr):
    # Create the output directory if it doesn't exist
    os.makedirs(output_path, exist_ok=True)

    # Get a list of all WAV files in the input directory and its subfolders
    for root, dirs, files in os.walk(input_path):
        for file_name in files:
            if file_name.endswith('.wav'):
                # Read the input WAV file
                input_file = os.path.join(root, file_name)
                audio, sr = librosa.load(input_file, sr=target_sr)

                # Write the resampled audio to the output WAV file
                output_dir = os.path.join(output_path, os.path.relpath(root, input_path))
                os.makedirs(output_dir, exist_ok=True)
                output_file = os.path.join(output_dir, file_name)
                sf.write(output_file, audio, target_sr)
def copy_wav_files_to_single_directory(input_path, output_path):
    # Create the output directory if it doesn't exist
    os.makedirs(output_path, exist_ok=True)

    # Get a list of all WAV files in the input directory and its subfolders
    for root, dirs, files in os.walk(input_path):
        for file_name in files:
            if file_name.endswith('.wav'):
                # Copy the WAV file to the output directory with the original filename
                input_file = os.path.join(root, file_name)
                output_file = os.path.join(output_path, file_name)
                shutil.copy2(input_file, output_file)

### Spectral Subtraction method

#### Define the path to denoise 

In [9]:
bahna_dataset = sorted(list(Path(r"C:\Users\nghna\Downloads\drive-download-20241022T091430Z-001").rglob('*.wav')))
bahna_dataset[0]

WindowsPath('C:/Users/nghna/Downloads/drive-download-20241022T091430Z-001/1.1.wav')

In [10]:
# dataframe to record statistics
stat_bahna_spectral_subtraction = pd.DataFrame(columns=['Name','Max', 'Min', 'Mean','Std','Noise_level_before_denoised','Max_after_denoised', 'Min_after_denoised', 'Mean_after_denoised','Std_after_denoised','Noise_level_after_denoised'])
stat_bahna_spectral_subtraction

Unnamed: 0,Name,Max,Min,Mean,Std,Noise_level_before_denoised,Max_after_denoised,Min_after_denoised,Mean_after_denoised,Std_after_denoised,Noise_level_after_denoised


In [11]:
#Load file to denoise
for i in bahna_dataset:
    print(i)
    waveform, sample_rate = torchaudio.load(i)
    max_,min_,mean_,std_ = get_stats(waveform, sample_rate = sample_rate)
    # Calculate noise level
    if std_!=0 :
        noise_level = 20*( np.log10(std_/max_))
    else:
        noise_level = 0
    normalized_path = os.path.normpath(i)
    input_path = os.path.splitext(normalized_path)[0] + os.path.splitext(normalized_path)[1]
    denoised_path = os.path.splitext(normalized_path)[0] + "_denoised_spectral_subtraction" + os.path.splitext(normalized_path)[1]
    snd = parselmouth.Sound(input_path)
    snd_denoised = snd.copy()
    snd_denoised = call(snd_denoised, "Reduce noise", 0.0, 0.0, 0.025, 80.0, 10000.0, 40.0,noise_level, "Spectral-subtraction")
    # Save the denoised audio
    snd_denoised.save(denoised_path, "WAV")
    print(denoised_path)
    waveform_denoised, sample_rate_denoised = torchaudio.load(denoised_path)
    max_after,min_after,mean_after,std_after=get_stats(waveform_denoised, sample_rate=sample_rate_denoised)
    if std_after!=0 :
        noise_level_after = 10*( np.log10(std_after/max_after))
    else:
        noise_level_after = 0
    df = pd.DataFrame({"Name":[i],"Max":[max_],"Min":[min_],"Mean":[mean_],"Std":[std_],"Noise_level_before_denoised":[noise_level],'Max_after_denoised':[max_after], 'Min_after_denoised':[min_after], 'Mean_after_denoised':[mean_after],'Std_after_denoised':[std_after],'Noise_level_after_denoised':[noise_level_after]})
    stat_bahna_spectral_subtraction=pd.concat([stat_bahna_spectral_subtraction,df], ignore_index=True)
    

C:\Users\nghna\Downloads\drive-download-20241022T091430Z-001\1.1.wav
C:\Users\nghna\Downloads\drive-download-20241022T091430Z-001\1.1_denoised_spectral_subtraction.wav


  stat_bahna_spectral_subtraction=pd.concat([stat_bahna_spectral_subtraction,df], ignore_index=True)


C:\Users\nghna\Downloads\drive-download-20241022T091430Z-001\1.2.wav
C:\Users\nghna\Downloads\drive-download-20241022T091430Z-001\1.2_denoised_spectral_subtraction.wav


In [14]:
stat_bahna_spectral_subtraction.head()

Unnamed: 0,Name,Max,Min,Mean,Std,Noise_level_before_denoised,Max_after_denoised,Min_after_denoised,Mean_after_denoised,Std_after_denoised,Noise_level_after_denoised
0,C:\Projects\Denoise-module-main\Denoise-module...,0.6552124,-0.7651062,-3.0624975e-05,0.07685376,-18.61434,0.65423584,-0.76345825,-2.699632e-08,0.07657743,-9.316335
1,C:\Projects\Denoise-module-main\Denoise-module...,0.55578613,-0.4998474,-3.9136925e-05,0.0951897,-15.326356,0.5375061,-0.47183228,1.0024613e-06,0.090305954,-7.74667
2,C:\Projects\Denoise-module-main\Denoise-module...,0.47247314,-0.53967285,-2.9462188e-05,0.048951853,-19.69216,0.44641113,-0.47470093,1.3798474e-05,0.03827656,-10.668021
3,C:\Projects\Denoise-module-main\Denoise-module...,0.4461975,-0.56170654,-4.4831922e-05,0.059409264,-17.51346,0.44207764,-0.55718994,-3.0327021e-06,0.05851719,-8.782151
4,C:\Projects\Denoise-module-main\Denoise-module...,0.32443237,-0.37664795,-1.7288974e-05,0.046413366,-16.889622,0.32235718,-0.3694458,1.4427932e-06,0.044849005,-8.565845


In [15]:
sample_rate

48000

### FRCRN method

This model use a fixed input with Sampling frequency is 16khz so we have to resample the input before we put it into our model

In [4]:
# Example usage resample to 16khz
input_directory = r'C:\Projects\Denoise-module-main\Denoise-module-main\Test'
output_directory = r'C:\Projects\Denoise-module-main\Denoise-module-main\testTarget'
target_sampling_rate = 16000

resample_wav_files(input_directory, output_directory, target_sampling_rate)

#### Define path to denoised

In [5]:
bahna_dataset_FRCRN = sorted(list(Path(r"C:\Projects\Denoise-module-main\Denoise-module-main\testTarget").rglob('*.wav')))
bahna_dataset_FRCRN[0]

WindowsPath('C:/Projects/Denoise-module-main/Denoise-module-main/testTarget/0001.2.wav')

In [6]:
stat_bahna_FRCRN = pd.DataFrame(columns=['Name','Max', 'Min', 'Mean','Std','Noise_level_before_denoised','Max_after_denoised', 'Min_after_denoised', 'Mean_after_denoised','Std_after_denoised','Noise_level_after_denoised'])
stat_bahna_FRCRN

Unnamed: 0,Name,Max,Min,Mean,Std,Noise_level_before_denoised,Max_after_denoised,Min_after_denoised,Mean_after_denoised,Std_after_denoised,Noise_level_after_denoised


In [7]:
from modelscope.pipelines import pipeline
from modelscope.utils.constant import Tasks


2024-06-25 15:26:42,955 - modelscope - INFO - PyTorch version 2.3.1 Found.
2024-06-25 15:26:42,957 - modelscope - INFO - Loading ast index from C:\Users\nghna\.cache\modelscope\ast_indexer
2024-06-25 15:26:43,247 - modelscope - INFO - Loading done! Current index file version is 1.15.0, with md5 c60ca7d7f8b85e22d9ef728cbad5174f and a total number of 980 components indexed


transformer is not installed, please install it if you want to use related modules


  from .autonotebook import tqdm as notebook_tqdm


In [8]:
for i in bahna_dataset_FRCRN:
    print(i)
    waveform, sample_rate = torchaudio.load(i)
    max_,min_,mean_,std_ = get_stats(waveform, sample_rate = sample_rate)
    if std_!=0 :
        noise_level = 10*( np.log10(std_/max_))
    else:
        noise_level = 0
    # df = pd.DataFrame({"Name":[i],"Std":[std_],"Noise_level":[noise_level]})
    # stat_bahna=pd.concat([stat_bahna,df], ignore_index=True)
    normalized_path = os.path.normpath(i)
    input_path = os.path.splitext(normalized_path)[0] + os.path.splitext(normalized_path)[1]
    denoised_path = os.path.splitext(normalized_path)[0] + "_denoised_FRCRN" + os.path.splitext(normalized_path)[1]
    
    # denoised audio    
    ans = pipeline(
        Tasks.acoustic_noise_suppression,
        model='./demo_model')
    result = ans(
        str(i),
        output_path=denoised_path)
    print(denoised_path)
    waveform_denoised, sample_rate_denoised = torchaudio.load(denoised_path)
    max_after,min_after,mean_after,std_after=get_stats(waveform_denoised, sample_rate=sample_rate_denoised)
    if std_after!=0 :
        noise_level_after = 10*( np.log10(std_after/max_after))
    else:
        noise_level_after = 0
    df_FRCRN = pd.DataFrame({"Name":[i],"Max":[max_],"Min":[min_],"Mean":[mean_],"Std":[std_],"Noise_level_before_denoised":[noise_level],'Max_after_denoised':[max_after], 'Min_after_denoised':[min_after], 'Mean_after_denoised':[mean_after],'Std_after_denoised':[std_after],'Noise_level_after_denoised':[noise_level_after]})
    stat_bahna_FRCRN=pd.concat([stat_bahna_FRCRN,df_FRCRN], ignore_index=True)

2024-06-25 15:26:44,861 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:26:44,862 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:26:44,864 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.2.wav


2024-06-25 15:26:45,179 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 55913)
padding: 19913
inputs after padding:(1, 75826)


  stat_bahna_FRCRN=pd.concat([stat_bahna_FRCRN,df_FRCRN], ignore_index=True)
2024-06-25 15:27:03,716 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:27:03,717 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:27:03,718 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.2_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.2_denoised_spectral_subtraction.wav


2024-06-25 15:27:03,992 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 55913)
padding: 19913
inputs after padding:(1, 75826)


2024-06-25 15:27:07,015 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:27:07,016 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:27:07,018 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.2_denoised_spectral_subtraction_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.3.wav


2024-06-25 15:27:07,384 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 27509)
padding: 491
inputs after padding:(1, 28000)


2024-06-25 15:27:15,894 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:27:15,895 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:27:15,898 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.3_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.3_denoised_spectral_subtraction.wav


2024-06-25 15:27:16,155 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 27509)
padding: 491
inputs after padding:(1, 28000)


2024-06-25 15:27:17,165 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:27:17,165 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:27:17,168 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.3_denoised_spectral_subtraction_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.4.wav


2024-06-25 15:27:17,434 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 42707)
padding: 18707
inputs after padding:(1, 61414)


2024-06-25 15:28:10,767 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:28:10,769 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:28:10,772 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.4_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.4_denoised_spectral_subtraction.wav


2024-06-25 15:28:11,048 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 42707)
padding: 18707
inputs after padding:(1, 61414)


2024-06-25 15:28:13,612 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:28:13,613 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:28:13,615 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.4_denoised_spectral_subtraction_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.5.wav


2024-06-25 15:28:13,890 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 31086)
padding: 19086
inputs after padding:(1, 50172)


2024-06-25 15:28:17,978 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:28:17,980 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:28:17,982 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.5_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.5_denoised_spectral_subtraction.wav


2024-06-25 15:28:18,280 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 31086)
padding: 19086
inputs after padding:(1, 50172)


2024-06-25 15:28:20,179 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:28:20,180 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:28:20,182 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\0001.5_denoised_spectral_subtraction_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\noisy.wav


2024-06-25 15:28:20,463 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 54784)
padding: 18784
inputs after padding:(1, 73568)


2024-06-25 15:29:29,951 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:29:29,952 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:29:29,954 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\noisy_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\noisy_denoised_spectral_subtraction.wav


2024-06-25 15:29:30,241 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 54784)
padding: 18784
inputs after padding:(1, 73568)


2024-06-25 15:29:33,279 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:29:33,280 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:29:33,283 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\noisy_denoised_spectral_subtraction_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\Test0001.1.wav


2024-06-25 15:29:33,671 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 104000)
padding: 20000
inputs after padding:(1, 124000)


2024-06-25 15:29:51,888 - modelscope - INFO - initiate model from ./demo_model
2024-06-25 15:29:51,889 - modelscope - INFO - initiate model from location ./demo_model.
2024-06-25 15:29:51,891 - modelscope - INFO - initialize model from ./demo_model


C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\Test0001.1_denoised_FRCRN.wav
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\Test0001.1_denoised_spectral_subtraction.wav


2024-06-25 15:29:52,176 - modelscope - INFO - cuda is not available, using cpu instead.


inputs:(1, 104000)
padding: 20000
inputs after padding:(1, 124000)
C:\Projects\Denoise-module-main\Denoise-module-main\testTarget\Test0001.1_denoised_spectral_subtraction_denoised_FRCRN.wav


In [9]:
stat_bahna_FRCRN.head()

Unnamed: 0,Name,Max,Min,Mean,Std,Noise_level_before_denoised,Max_after_denoised,Min_after_denoised,Mean_after_denoised,Std_after_denoised,Noise_level_after_denoised


### Evaluation (PESQ, STOI)

PESQ Score only support for sampling rate Fs = 8khz (defined as narrow band) or 16khz (defined as wide band) only so we have to resample it

In [8]:
# Example usage resample to 16khz
input_directory = r'D:/Master/Thesis/FRCRN/Summary/Test/'
output_directory = 'D:/Master/Thesis/FRCRN/Summary/Test resample'
target_sampling_rate = 16000

resample_wav_files(input_directory, output_directory, target_sampling_rate)

In [9]:
# Example usage Datasets/clean_testset_wav
input_directory = 'D:\Master\Thesis\FRCRN\CTV bana to add noise\Bình Định'
output_file = 'D:\Master\Thesis\FRCRN\CTV bana to add noise\Bình Định only'

copy_wav_files_to_single_directory(input_directory, output_file)

#### Example evaluate PESQ (wide band - 16khz) and STOI for 1 clean - denoised

In [21]:
m = 'C:\\Projects\\Denoise-module-main\\Denoise-module-main\\Test0001.1.wav'
n = 'C:\\Projects\\Denoise-module-main\\Denoise-module-main\\Test0001.1_denoised_spectral_subtraction.wav'
waveform, sample_rate = torchaudio.load(m)
waveform_denoised, sample_rate_denoised = torchaudio.load(n)

LibsndfileError: Error opening 'C:\\Projects\\Denoise-module-main\\Denoise-module-main\\Test0001.1.wav': System error.

!pip install torchmetrics

In [70]:
from torchmetrics.audio import PerceptualEvaluationSpeechQuality
# pesq = PerceptualEvaluationSpeechQuality(8000, 'nb')
# pesq(preds, target)

wb_pesq = PerceptualEvaluationSpeechQuality(sample_rate, 'wb')
wb_pesq(waveform_denoised,waveform).item()

4.433448314666748

In [71]:
from torchmetrics.audio.stoi import ShortTimeObjectiveIntelligibility
stoi = ShortTimeObjectiveIntelligibility(sample_rate, False)
stoi(waveform_denoised,waveform).item()

0.9990944266319275

#### To evaluate multiple sample at one

In [72]:
metrics = pd.DataFrame(columns=['PESQ','STOI'])

In [None]:
for i in bahna_dataset_FRCRN:
    print(i)
    waveform, sample_rate = torchaudio.load(i)
    #normalized_path = os.path.normpath(i)
    denoised_path = os.path.splitext(i)[0] + "_denoised_FRCRN" + os.path.splitext(i)[1]
    waveform_denoised, sample_rate_denoised = torchaudio.load(denoised_path)
    print(denoised_path)
    wb_pesq = PerceptualEvaluationSpeechQuality(sample_rate, 'wb')
    stoi = ShortTimeObjectiveIntelligibility(sample_rate, False)
    metric = pd.DataFrame({"PESQ":[wb_pesq(waveform, waveform_denoised)],"STOI":[stoi(waveform, waveform_denoised)]})
    metrics = pd.concat([metrics,metric], ignore_index=True)

In [74]:
metrics

Unnamed: 0,PESQ,STOI
0,tensor(4.1133),tensor(0.9992)
1,tensor(1.9175),tensor(0.9248)
2,tensor(2.6582),tensor(0.9736)
3,tensor(2.6802),tensor(0.9624)
4,tensor(4.3995),tensor(0.9952)
