In [1]:
%%capture
!pip install asteroid

In [None]:
%%capture
!pip install neptune-client

# [Dual-path RNN:](https://arxiv.org/pdf/1910.06379.pdf) 

Efficient long sequence modeling for time-domain 
single-channel speech separation


1. [Pretrained Model](https://huggingface.co/mpariente/DPRNNTasNet-ks2_WHAM_sepclean)

2. [DPRNNTasNet](https://github.com/asteroid-team/asteroid/tree/master/egs/wham/DPRNN)

In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/Shareddrives/TG-Separación-Fuentes/code/train-speech-separation-models/train

/content/drive/Shareddrives/TG-Separación-Fuentes/code/train-speech-separation-models/train


In [None]:
import pandas as pd
import numpy as np
from torch.optim import Adam
from torch.utils.data import DataLoader,Dataset
import pytorch_lightning as pl
import yaml
import json
from asteroid.utils import prepare_parser_from_dict, parse_args_as_dict
from torch.optim.lr_scheduler import ReduceLROnPlateau
from asteroid.metrics import get_metrics
from asteroid.utils import tensors_to_device
from tqdm import tqdm
from asteroid.dsp.normalization import normalize_estimates
import os

from asteroid.models import DPRNNTasNet

from asteroid.engine.system import System
from asteroid.losses import PITLossWrapper, pairwise_neg_sisdr
import soundfile as sf
import torch
import random as random
from IPython.display import display, Audio
from asteroid.models import BaseModel
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
from asteroid.engine.optimizers import make_optimizer
from pytorch_lightning.loggers.neptune import NeptuneLogger
from lib.CallSpanish_dataset import CallSpanish

ModuleNotFoundError: ignored

In [None]:
# Save checkpoints
default_root_dir = "/content/drive/Shareddrives/TG-Separación-Fuentes/code/Checkpoints-separation-models/DPRNN/checkpoints"
save_best_model = "/content/drive/Shareddrives/TG-Separación-Fuentes/code/Checkpoints-separation-models/DPRNN/best_model/"

In [None]:
PATH_DATA_ROOT = "../../Datasets/01-Data_experimental/intermediate"
MIX = PATH_DATA_ROOT+"mix/"
S_1 = PATH_DATA_ROOT+"source_1/"
S_2 = PATH_DATA_ROOT+"source_2/"

ROOT_CSV  ="/content/drive/Shareddrives/TG-Separación-Fuentes/code/train-speech-separation-models/train/resources/"


# CSV
PATH_CSV_MIX = ROOT_CSV +"mixture_callfriend_spanish.csv"
PATH_CSV_TRAIN = ROOT_CSV + "mixture_train_mix_clean_callfriend_spanish.csv"
PATH_CSV_VALID = ROOT_CSV +"mixture_val_mix_clean_callfriend_spanish.csv"
PATH_CSV_TEST = ROOT_CSV + "mixture_test_mix_clean_callfriend_spanish.csv"

PATH_CONFIG = "./resources/conf-DPRNN.yml"


df_train = pd.read_csv(PATH_CSV_TRAIN)
df_val = pd.read_csv(PATH_CSV_VALID)
#df_test = pd.read_csv(PATH_CSV_TEST)

In [None]:
df_train.shape

In [None]:
df_val.shape

(87441, 5)

# 1. Configurar logger Neptune

In [None]:
with open(PATH_CONFIG) as f:
        conf = yaml.safe_load(f)
conf["main_args"]={"exp_dir":save_best_model}

In [None]:
exp_dir = conf["main_args"]["exp_dir"]
os.makedirs(exp_dir, exist_ok=True)

In [None]:
conf

{'data': {'mode': 'min',
  'nondefault_nsrc': None,
  'sample_rate': 8000,
  'segment': 2.0,
  'task': 'sep_clean',
  'train_dir': 'data/wav8k/min/tr/',
  'valid_dir': 'data/wav8k/min/cv/'},
 'filterbank': {'kernel_size': 2, 'n_filters': 64, 'stride': 1},
 'main_args': {'exp_dir': '/content/drive/Shareddrives/TG-Separación-Fuentes/code/Checkpoints-separation-models/DPRNN/best_model/'},
 'masknet': {'bidirectional': True,
  'bn_chan': 128,
  'chunk_size': 250,
  'dropout': 0,
  'hid_size': 128,
  'hop_size': 125,
  'in_chan': 64,
  'mask_act': 'sigmoid',
  'n_repeats': 6,
  'n_src': 2,
  'out_chan': 64},
 'optim': {'lr': 0.001, 'optimizer': 'adam', 'weight_decay': 1e-05},
 'test': {'n_save_examples': 5},
 'training': {'batch_size': 4,
  'early_stop': True,
  'epochs': 200,
  'gradient_clipping': 5,
  'half_lr': True,
  'num_workers': 4}}

In [None]:
start = 2
end = 3

In [None]:
# Configurarmos el experimento y sus parametros
experiment_name = "DPRNN_77_train_24.2_val_24.2_test_sta_"+str(start)+"_end_"+str(end)
params=conf
tags = ["test_lote_start_"+str(start)+"_end_"+str(end)+"_DPRNN_pretrained"]

# Definir Logger 
neptune_logger = NeptuneLogger(
    api_key="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI5NjRkMmY2YS04M2EwLTRiMGMtODk1Ny1mMWQxZTA3NGM1NzAifQ==",
    project_name="josearangos/Tg-speech-separation",experiment_name=experiment_name,
    params = params, tags = tags, close_after_fit=False)

NeptuneLogger will work in online mode


# Test modelo pretrained


In [None]:
PATH_TEST = df_val.iloc[1,1]
model_before = BaseModel.from_pretrained("mpariente/DPRNNTasNet-ks2_WHAM_sepclean")
model_before.cuda()

DPRNNTasNet(
  (encoder): Encoder(
    (filterbank): FreeFB()
  )
  (masker): DPRNN(
    (bottleneck): Sequential(
      (0): GlobLN()
      (1): Conv1d(64, 128, kernel_size=(1,), stride=(1,))
    )
    (net): Sequential(
      (0): DPRNNBlock(
        (intra_RNN): SingleRNN(
          (rnn): LSTM(128, 128, batch_first=True, bidirectional=True)
        )
        (inter_RNN): SingleRNN(
          (rnn): LSTM(128, 128, batch_first=True, bidirectional=True)
        )
        (intra_linear): Linear(in_features=256, out_features=128, bias=True)
        (intra_norm): GlobLN()
        (inter_linear): Linear(in_features=256, out_features=128, bias=True)
        (inter_norm): GlobLN()
      )
      (1): DPRNNBlock(
        (intra_RNN): SingleRNN(
          (rnn): LSTM(128, 128, batch_first=True, bidirectional=True)
        )
        (inter_RNN): SingleRNN(
          (rnn): LSTM(128, 128, batch_first=True, bidirectional=True)
        )
        (intra_linear): Linear(in_features=256, out_features

In [None]:
mixture, _ = sf.read(PATH_TEST, dtype="float32", always_2d=True)
# Soundfile returns the mixture as shape (time, channels), and Asteroid expects (batch, channels, time)
mixture = mixture.transpose()
mixture = mixture.reshape(1, mixture.shape[0], mixture.shape[1])
out_wavs_before = model_before.separate(mixture)

In [None]:
mixture.shape

(1, 1, 120000)

In [None]:
display(Audio(PATH_TEST))
display(Audio(out_wavs_before[0,0,:],rate=8000))
display(Audio(out_wavs_before[0,1,:],rate=8000))

# Calcular metricas modelo preentrenado

In [None]:
test_set = CallSpanish(
    csv_path=PATH_CSV_VALID,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=None,
    return_id=True,
    start = start,
    end = end
)

In [None]:
len(test_set)

1

In [None]:
#Guardar las metricas 
results = np.zeros(10)
results

array([0., 0., 0., 0., 0., 0., 0., 0., 0., 0.])

In [None]:
test(start, end, results, model_before,test_set,pretrained=True)

  0%|          | 0/1 [00:00<?, ?it/s]

Mix shape tensor([ 0.0005, -0.0006, -0.0006,  ..., -0.0291, -0.0178, -0.0064],
       device='cuda:0')


ValueError: ignored

# 2. Cargar Datos para train y valid

In [None]:
train_set = CallSpanish(
    csv_path=PATH_CSV_TRAIN,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=2
)

val_set = CallSpanish(
   csv_path=PATH_CSV_VALID,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=2
)

Drop 0 utterances from 60 (shorter than 2 seconds)
Drop 0 utterances from 10 (shorter than 2 seconds)


## Nota: Batch_size == 2, ya que 4

In [None]:
train_loader = DataLoader(train_set,shuffle=True,batch_size=2, drop_last=True,num_workers=4)
val_loader = DataLoader(val_set, batch_size=2, drop_last=True,num_workers=4)

# 3. Definición del modelo

In [None]:
model = DPRNNTasNet(
        **conf["filterbank"], **conf["masknet"], sample_rate=conf["data"]["sample_rate"]
    )

In [None]:
optimizer = make_optimizer(model.parameters(), **conf["optim"])
# Define scheduler
scheduler = None
if conf["training"]["half_lr"]:
    scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5)
# Just after instantiating, save the args. Easy loading in the future.


In [None]:
exp_dir = conf["main_args"]["exp_dir"]
os.makedirs(exp_dir, exist_ok=True)

In [None]:
# Define Loss function.
loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")
system = System(
    model=model,
    loss_func=loss_func,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=val_loader,
    scheduler=scheduler,
    config=conf,
)

In [None]:
# Define callbacks
callbacks = []
checkpoint_dir = os.path.join(exp_dir, "checkpoints/")
checkpoint = ModelCheckpoint(
    checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True
)

In [None]:
callbacks.append(checkpoint)
if conf["training"]["early_stop"]:
    callbacks.append(EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True))

In [None]:
gpus = -1 if torch.cuda.is_available() else None
#distributed_backend = "ddp" if torch.cuda.is_available() else None

# 4. Train modelo

In [None]:
trainer = pl.Trainer(
        max_epochs=conf["training"]["epochs"],
        callbacks=callbacks,
        default_root_dir=exp_dir,
        gpus=gpus,
        #distributed_backend=distributed_backend,
        gradient_clip_val=conf["training"]["gradient_clipping"],
        logger=neptune_logger
    )

In [None]:
trainer.fit(system)

# 5.Guardar mejor modelo

In [None]:
best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f:
    json.dump(best_k, f, indent=0)

state_dict = torch.load(checkpoint.best_model_path)
system.load_state_dict(state_dict=state_dict["state_dict"])
system.cpu()

to_save = system.model.serialize()
best_model_path = os.path.join(exp_dir, "best_model.pth")
torch.save(to_save,best_model_path )

#Send best model to neptune
neptune_logger.experiment.log_artifact(best_model_path)

# Test model y calcular metricas

In [None]:
def step_test(start, results,model,loss_func,COMPUTE_METRICS,eval_save_dir,ex_save_dir,test_set,neptune_status,model_device,pretrained=False):
  series_list = []
  i = start

  torch.no_grad().__enter__()
  for idx in tqdm(range(len(test_set))):

    mix, sources, ids = test_set[idx]
    
    mix, sources = tensors_to_device([mix, sources], device=model_device)

    est_sources = model(mix[None, None])
    loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True)
    mix_np = mix[None].cpu().data.numpy()
    sources_np = sources.cpu().data.numpy()
    est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
    utt_metrics = get_metrics(
        mix_np,
        sources_np,
        est_sources_np,
        sample_rate=conf["data"]["sample_rate"],
        metrics_list=COMPUTE_METRICS,
    )

    r = 0
    iteration = str(i)
    neptune_logger.experiment.log_metric("Iteración:", i)


    for metric_name in COMPUTE_METRICS:
      input_metric_name = "input_" + metric_name     
      results[r] = results[r] + utt_metrics[input_metric_name]                   
      neptune_logger.experiment.log_metric(input_metric_name, results[r])
      r = r +1

    
    for metric_name in COMPUTE_METRICS:
      results[r] = results[r] + utt_metrics[metric_name]          
      neptune_logger.experiment.log_metric(metric_name, results[r])
      r = r +1

    
    utt_metrics["mix_path"] = test_set.mixture_path
    series_list.append(pd.Series(utt_metrics))
    print("Iteration =>",i)
    i = i + 1

    # Save some examples in a folder. Wav files and metrics as text.
    if conf["test"]["n_save_examples"] == -1:
        conf["test"]["n_save_examples"] = len(test_set)
    
    save_idx = random.sample(range(len(test_set)),conf["test"]["n_save_examples"])

    if idx in save_idx:
      example_name = "ex_{}/".format(idx)

      local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
      os.makedirs(local_save_dir, exist_ok=True)
      
      sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["data"]["sample_rate"])
      # Loop over the sources and estimates

      for src_idx, src in enumerate(sources_np):
          sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src, conf["data"]["sample_rate"])
      
      for src_idx, est_src in enumerate(est_sources_np):

          path_estimation_source = local_save_dir + "s{}_estimate.wav".format(src_idx+1)

          if (pretrained):
            path_estimation_source = local_save_dir + "s{}_pretrained_estimate.wav".format(src_idx+1)
          
          sf.write(path_estimation_source,
                    est_src,
                    conf["data"]["sample_rate"],
          )
          if (neptune_status):
            neptune_logger.experiment.log_artifact(path_estimation_source)


      #Send estimation wavs
      mix_path = local_save_dir + "mixture.wav"
      if (neptune_status):
        neptune_logger.experiment.log_artifact(mix_path)

      neptune_status = False
              
      # Write local metrics to the example folder.
      with open(local_save_dir + "metrics.json", "w") as f:
          json.dump(utt_metrics, f, indent=0)


  return series_list


In [None]:
def compute_global_metrics(start,end, series_list,COMPUTE_METRICS,eval_save_dir,pretrained=False):
  all_metrics_df = pd.DataFrame(series_list)
  name = "all_metrics.csv"
  final_metrics = "final_metrics.json"
  if (pretrained):
    name= "all_metrics_pretrained_model_start_"+str(start)+"_end_"+str(end)+".csv"
    final_metrics ="final_metrics_pretrained_model_start_"+str(start)+"_end_"+str(end)+".json"
    
  all_metrics_path = os.path.join(eval_save_dir,name )  
  all_metrics_df.to_csv(all_metrics_path)

  #Send All metrics
  neptune_logger.experiment.log_artifact(all_metrics_path)

  final_results = {}
  for metric_name in COMPUTE_METRICS:
      input_metric_name = "input_" + metric_name
      ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
      final_results[metric_name] = all_metrics_df[metric_name].mean()
      final_results[metric_name + "_imp"] = ldf.mean()  

  summary_metrics = os.path.join(eval_save_dir,final_metrics)


  with open(summary_metrics, "w") as f:
          json.dump(final_results, f, indent=0)

  #Send summary metrics
  neptune_logger.experiment.log_artifact(summary_metrics)

In [None]:
def test(start, end, results, model,test_set,pretrained=False):
  model_device = next(model.parameters()).device
  loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")
  COMPUTE_METRICS = ["si_sdr", "sdr", "sir", "sar", "stoi"]

  eval_save_dir = os.path.join(exp_dir, "metrics")
  ex_save_dir = os.path.join(eval_save_dir, "examples/")
  neptune_status=True

  series_list = step_test(start, results, model,loss_func,COMPUTE_METRICS,eval_save_dir,ex_save_dir,test_set,neptune_status,model_device,pretrained)
  compute_global_metrics(start,end,series_list,COMPUTE_METRICS,eval_save_dir,pretrained)  
  neptune_logger.experiment.stop()

# 6. Test model despues entrenar

In [None]:
PATH_TEST = df_val.iloc[2,1]

path_best_model =  os.path.join(exp_dir, "best_model.pth")
best_model  = DPRNNTasNet.from_pretrained(path_best_model)
best_model.cuda()

mixture, _ = sf.read(PATH_TEST, dtype="float32", always_2d=True)
# Soundfile returns the mixture as shape (time, channels), and Asteroid expects (batch, channels, time)
mixture = mixture.transpose()
mixture = mixture.reshape(1, mixture.shape[0], mixture.shape[1])
out_wavs_after = best_model.separate(mixture)

In [None]:
display(Audio(PATH_TEST))
display(Audio(out_wavs_after[0,0,:],rate=8000))
display(Audio(out_wavs_after[0,1,:],rate=8000))

# 7. Test ejemplos no vistos

In [None]:
model_path = os.path.join(conf["main_args"]["exp_dir"], "best_model.pth")
model = DPRNNTasNet.from_pretrained(model_path)
# Handle device placement
model.cuda()

model_device = next(model.parameters()).device

test_set = CallSpanish(
    csv_path=PATH_CSV_TEST,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=None,
    return_id=True,
)


In [None]:
loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")
COMPUTE_METRICS = ["si_sdr", "sdr", "sir", "sar", "stoi"]

eval_save_dir = os.path.join(exp_dir, "metrics")
ex_save_dir = os.path.join(eval_save_dir, "examples/")
neptune_status=True

model = model_before

model_device = next(model.parameters()).device

In [None]:
series_list = []
torch.no_grad().__enter__()
for idx in tqdm(range(len(test_set))):
    # Forward the network on the mixture.
    mix, sources, ids = test_set[idx]
    mix, sources = tensors_to_device([mix, sources], device=model_device)
    
    print("MIX", mix.shape,mix[None, None].shape)
    print("SOURCE",sources.shape)

    est_sources = model(mix[None, None])

    loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True)

    mix_np = mix[None].cpu().data.numpy()

    print("mix_np",mix_np.shape)
    print("sources_np",sources_np.shape)

    sources_np = sources.cpu().data.numpy()
    est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
    
    print(est_sources_np.shape,"est_sources_np")

    utt_metrics = get_metrics(
                mix_np,
                sources_np,
                est_sources_np,
                sample_rate=conf["data"]["sample_rate"],
                metrics_list=COMPUTE_METRICS,
            )
    

    print("METRICS ",utt_metrics)
    utt_metrics["mix_path"] = test_set.mixture_path
    series_list.append(pd.Series(utt_metrics))


    # Save some examples in a folder. Wav files and metrics as text.
    if conf["test"]["n_save_examples"] == -1:
        conf["test"]["n_save_examples"] = len(test_set)
    
    save_idx = [0]#random.sample(range(len(test_set)),conf["test"]["n_save_examples"])


    if idx in save_idx:

        example_name = "ex_{}/".format(idx)

        local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
        os.makedirs(local_save_dir, exist_ok=True)
        sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["data"]["sample_rate"])

        # Loop over the sources and estimates

        for src_idx, src in enumerate(sources_np):
            sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src, conf["data"]["sample_rate"])
        
        for src_idx, est_src in enumerate(est_sources_np):
            path_estimation_source = local_save_dir + "s{}_estimate.wav".format(src_idx+1)
            sf.write(path_estimation_source,
                     est_src,
                     conf["data"]["sample_rate"],
            )
            if (neptune_status):
              pass
              #neptune_logger.experiment.log_artifact(path_estimation_source)


        #Send estimation wavs
        mix_path = local_save_dir + "mixture.wav"
        if (neptune_status):
          pass
          #neptune_logger.experiment.log_artifact(mix_path)

        neptune_status = False
                
        # Write local metrics to the example folder.
        with open(local_save_dir + "metrics.json", "w") as f:
            json.dump(utt_metrics, f, indent=0)

  0%|          | 0/1 [00:00<?, ?it/s]

MIX torch.Size([120000]) torch.Size([1, 1, 120000])
SOURCE torch.Size([2, 120000])
mix_np (1, 120000)
sources_np (2, 120000)
(2, 120000) est_sources_np


100%|██████████| 1/1 [00:04<00:00,  4.34s/it]

METRICS  {'input_si_sdr': 0.006041288375854492, 'input_sdr': 0.11592992185818751, 'input_sir': 0.11592992185818485, 'input_sar': 289.5085660553454, 'input_stoi': 0.8677301863078911, 'si_sdr': -0.7535203099250793, 'sdr': 0.9053761896288648, 'sir': 8.734941212408819, 'sar': 5.800290082214877, 'stoi': 0.6889500934111019}





# 8. Metricas

In [None]:
all_metrics_df = pd.DataFrame(series_list)
all_metrics_path = os.path.join(eval_save_dir, "all_metrics.csv")
all_metrics_df.to_csv(all_metrics_path)

#Send All metrics
#neptune_logger.experiment.log_artifact(all_metrics_path)

final_results = {}
for metric_name in COMPUTE_METRICS:
    input_metric_name = "input_" + metric_name
    ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
    final_results[metric_name] = all_metrics_df[metric_name].mean()
    final_results[metric_name + "_imp"] = ldf.mean()

summary_metrics = os.path.join(eval_save_dir, "final_metrics.json")
with open(summary_metrics, "w") as f:
        json.dump(final_results, f, indent=0)

#Send summary metrics
#neptune_logger.experiment.log_artifact(summary_metrics)

In [None]:
#neptune_logger.experiment.stop()

In [None]:
final_results

{'sar': 5.797290292730607,
 'sar_imp': -274.7125952952848,
 'sdr': 1.911685016228895,
 'sdr_imp': 1.6313921836268943,
 'si_sdr': 0.6377596259117126,
 'si_sdr_imp': 0.930385172367096,
 'sir': 7.357038505872008,
 'sir_imp': 7.076745673270002,
 'stoi': 0.7119727502274262,
 'stoi_imp': -0.07301022461344442}