In [1]:
import os
import sys
import torch
import torch.nn.functional as F
import torchaudio
import speechbrain as sb
import speechbrain.nnet.schedulers as schedulers
import numpy as np
from tqdm import tqdm
import logging
from hyperpyyaml import load_hyperpyyaml
import csv
device = torch.device('cuda')
logger = logging.getLogger(__name__)

# from mir_eval.separation import bss_eval_sources
from speechbrain.dataio.dataio import read_audio
from fast_bss_eval import bss_eval_sources

  from .autonotebook import tqdm as notebook_tqdm
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.
The torchaudio backend is switched to 'soundfile'. Note that 'sox_io' is not supported on Windows.


In [2]:
def read_all_audio(path):
   audio_ids = []
   out_target = {}
   out_est = {}
   out_mix = []
   for filename in os.listdir(path):
      filepath = os.path.join(path, filename)
      item_num = filename.split('_')[0]
      audio = read_audio(filepath).unsqueeze(0).to(device)
      if "hat" in filename:
         if  item_num in out_est:
            out_est[item_num].append(audio)
         else: 
            out_est[item_num]  = [audio]
      elif "mix" not in filename:
         if  item_num in out_est:
            out_target[item_num].append(audio)
         else: 
            out_target[item_num]  = [audio]
      elif "mix" in filename:
         audio_ids.append(item_num.replace('item',''))
         out_mix.append(audio)
   out_target = list(out_target.values())
   out_est = list(out_est.values())
      
   return audio_ids, out_target, out_est, out_mix

audio_ids,out_target, out_est, out_mix = read_all_audio('results/audio_results')

In [5]:
class Separation(sb.Brain):
   def compute_objectives(self, predictions, targets):
        """Computes the si-snr loss"""
        return self.hparams.loss(targets, predictions)
     
   def get_metrics(self,audio_ids, targets, preds, mixtures):

        # Create folders where to store audio
      save_file = os.path.join(self.hparams.output_folder, "test_results.csv")

        # Variable init

      all_sdrs = []
      all_sdrs_i = []
      all_sisnrs = []
      all_sisnrs_i = []
      csv_columns = ["audio_id", "sdr", "sdr_i", "si-snr", "si-snr_i"]

        

      with open(save_file, "w") as results_csv:
         writer = csv.DictWriter(results_csv, fieldnames=csv_columns)
         writer.writeheader()
            
         for audio_id, target, pred,mixture_ in tqdm(zip(audio_ids,targets, preds,mixtures), total=len(targets)):
   
            # Compute SI-SNR
            target = torch.cat(
            [target[i].unsqueeze(-1) for i in range(self.hparams.num_spks)],
            dim=-1,
        ).to(self.device)
            
            pred = torch.cat(
            [pred[i].unsqueeze(-1) for i in range(self.hparams.num_spks)],
            dim=-1,
        ).to(self.device)
  
            sisnr = self.compute_objectives(pred, target)
            # COmpute SI-SNR Improvement
            mixture_signal = torch.stack(
               [mixture_] * self.hparams.num_spks, dim=-1
            )
            mixture_signal = mixture_signal.to(target.device)
            sisnr_baseline = self.compute_objectives(
               mixture_signal, target
            )
            sisnr_i = sisnr - sisnr_baseline
  # Compute SDR
            sdr, _, _, _ = bss_eval_sources(
               target[0].t(),
               pred[0].t(),
            )

            sdr_baseline, _, _, _ = bss_eval_sources(
               target[0].t(),
               mixture_signal[0].t(),
            )
           
            sdr_i = sdr.mean() - sdr_baseline.mean()

            # Saving on a csv file
            row = {
               "audio_id": audio_id,
               "sdr": sdr.mean().cpu().numpy(),
               "sdr_i": sdr_i.cpu().numpy(),
               "si-snr": -sisnr.item(),
               "si-snr_i": -sisnr_i.item(),
            }
            writer.writerow(row)

            # Metric Accumulation
            all_sdrs.append(sdr.mean().cpu().numpy())
            all_sdrs_i.append(sdr_i.mean().cpu().numpy())
            all_sisnrs.append(-sisnr.item())
            all_sisnrs_i.append(-sisnr_i.item())
      logger.info("Mean SISNR is {}".format(np.array(all_sisnrs).mean()))
      logger.info("Mean SISNRi is {}".format(np.array(all_sisnrs_i).mean()))
      logger.info("Mean SDR is {}".format(np.array(all_sdrs).mean()))
      logger.info("Mean SDRi is {}".format(np.array(all_sdrs_i).mean()))   


In [6]:
def run():
    # Load hyperparameters file with command-line overrides
   hparams_file, run_opts, overrides = sb.parse_arguments(["hyperparams.yaml"])
   hparams_file = 'hyperparams.yaml'
#    run_opts = {"device": "cuda:0"}
   with open(hparams_file) as fin:
      hparams = load_hyperpyyaml(fin, overrides)
        
      # Load pretrained model if pretrained_separator is present in the yaml
   if "pretrained_separator" in hparams:
       # run_on_main(hparams["pretrained_separator"].collect_files)
       hparams["pretrained_separator"].load_collected(
            device=run_opts["device"]
       )
   # Brain class initialization
   separator = Separation(
        modules=hparams["modules"],
        run_opts={"device": "cuda"},
        hparams=hparams,
    )
   separator.get_metrics(audio_ids, out_target, out_est, out_mix)
run()

100%|██████████| 2604/2604 [02:18<00:00, 18.79it/s]


In [39]:
import pandas as pd
df= pd.read_csv('results/test_results.csv')


df.loc[df['sdr']== df['sdr'].median()]
df.sort_values(by='sdr', inplace=True)
# df[df['sdr'] > df['sdr'].median()].iloc[0]
median_audio = df[df['sdr'] < df['sdr'].median()].iloc[-1]
median_audio


audio_id    2445.000000
sdr           18.172338
sdr_i         17.768276
si-snr        17.021784
si-snr_i      16.838556
Name: 1445, dtype: float64

In [31]:
df.loc[df['sdr']== df['sdr'].median()]
df.sort_values(by='sdr', inplace=True)
# df[df['sdr'] > df['sdr'].median()].iloc[0]
median_audio = df[df['sdr'] < df['sdr'].median()].iloc[-1]
med