In [None]:
%%capture
!pip install asteroid

In [None]:
%%capture
!pip install neptune-client

# [Dual-path RNN:](https://arxiv.org/pdf/1910.06379.pdf) 

Efficient long sequence modeling for time-domain 
single-channel speech separation


1. [Pretrained Model](https://huggingface.co/mpariente/DPRNNTasNet-ks2_WHAM_sepclean)

2. [DPRNNTasNet](https://github.com/asteroid-team/asteroid/tree/master/egs/librimix/DPRNNTasNet)

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
%cd /content/drive/Shareddrives/TG-Separación-Fuentes/code/train-speech-separation-models/train

/content/drive/Shareddrives/TG-Separación-Fuentes/code/train-speech-separation-models/train


In [None]:
import pandas as pd
import numpy as np
from torch.optim import Adam
from torch.utils.data import DataLoader,Dataset
import pytorch_lightning as pl
import yaml
import json
from asteroid.utils import prepare_parser_from_dict, parse_args_as_dict
from torch.optim.lr_scheduler import ReduceLROnPlateau
from asteroid.metrics import get_metrics
from asteroid.utils import tensors_to_device
from tqdm import tqdm
from asteroid.dsp.normalization import normalize_estimates
import os

from asteroid.models import DPRNNTasNet

from asteroid.engine.system import System
from asteroid.losses import PITLossWrapper, pairwise_neg_sisdr
import soundfile as sf
import torch
import random as random
from IPython.display import display, Audio
from asteroid.models import BaseModel
from pytorch_lightning.callbacks import ModelCheckpoint,EarlyStopping
from asteroid.engine.optimizers import make_optimizer
from pytorch_lightning.loggers.neptune import NeptuneLogger
from lib.CallSpanish_dataset import CallSpanish

In [None]:
# Save checkpoints
default_root_dir = "/content/drive/Shareddrives/TG-Separación-Fuentes/code/Checkpoints-separation-models/DPRNN/checkpoints"
save_best_model = "/content/drive/Shareddrives/TG-Separación-Fuentes/code/Checkpoints-separation-models/DPRNN/best_model/"

In [None]:
PATH_DATA_ROOT = "../../Datasets/01-Data_experimental/intermediate"
MIX = PATH_DATA_ROOT+"mix/"
S_1 = PATH_DATA_ROOT+"source_1/"
S_2 = PATH_DATA_ROOT+"source_2/"

# CSV
PATH_CSV_TRAIN = "./resources/mixture_train_mix_clean.csv"
PATH_CSV_VALID ="./resources/mixture_val_mix_clean.csv"
PATH_CSV_TEST = "./resources/mixture_test_mix_clean.csv"

PATH_CONFIG = "./resources/conf-DPRNN.yml"


df_train = pd.read_csv(PATH_CSV_TRAIN)
df_val = pd.read_csv(PATH_CSV_VALID)
df_test = pd.read_csv(PATH_CSV_TEST)

# Test modelo pretrained


In [None]:
PATH_TEST = df_val.iloc[2,1]
  
model_before = BaseModel.from_pretrained("mpariente/DPRNNTasNet-ks2_WHAM_sepclean")
model_before.cuda()

mixture, _ = sf.read(PATH_TEST, dtype="float32", always_2d=True)
# Soundfile returns the mixture as shape (time, channels), and Asteroid expects (batch, channels, time)
mixture = mixture.transpose()
mixture = mixture.reshape(1, mixture.shape[0], mixture.shape[1])
out_wavs_before = model_before.separate(mixture)

HBox(children=(FloatProgress(value=0.0, description='Downloading', max=14664381.0, style=ProgressStyle(descrip…




In [None]:
display(Audio(PATH_TEST))
display(Audio(out_wavs_before[0,0,:],rate=8000))
display(Audio(out_wavs_before[0,1,:],rate=8000))

# 1. Configurar logger Neptune

In [None]:
with open(PATH_CONFIG) as f:
        conf = yaml.safe_load(f)
conf["main_args"]={"exp_dir":save_best_model}

In [None]:
# Configurarmos el experimento y sus parametros
experiment_name = "DPRNN_60_train_10_val_30_test"
params=conf

# Definir Logger 
neptune_logger = NeptuneLogger(
    api_key="eyJhcGlfYWRkcmVzcyI6Imh0dHBzOi8vYXBwLm5lcHR1bmUuYWkiLCJhcGlfdXJsIjoiaHR0cHM6Ly9hcHAubmVwdHVuZS5haSIsImFwaV9rZXkiOiI5NjRkMmY2YS04M2EwLTRiMGMtODk1Ny1mMWQxZTA3NGM1NzAifQ==",
    project_name="josearangos/Tg-speech-separation",experiment_name=experiment_name,
    params = params, close_after_fit=False)

NeptuneLogger will work in online mode


# 2. Cargar Datos para train y valid

In [None]:
train_set = CallSpanish(
    csv_path=PATH_CSV_TRAIN,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=2
)

val_set = CallSpanish(
   csv_path=PATH_CSV_VALID,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=2
)

Drop 0 utterances from 60 (shorter than 2 seconds)
Drop 0 utterances from 10 (shorter than 2 seconds)


## Nota: Batch_size == 2, ya que 4

In [None]:
train_loader = DataLoader(train_set,shuffle=True,batch_size=2, drop_last=True,num_workers=4)
val_loader = DataLoader(val_set, batch_size=2, drop_last=True,num_workers=4)

# 3. Definición del modelo

In [None]:
model = DPRNNTasNet(
        **conf["filterbank"], **conf["masknet"], sample_rate=conf["data"]["sample_rate"]
    )

In [None]:
optimizer = make_optimizer(model.parameters(), **conf["optim"])
# Define scheduler
scheduler = None
if conf["training"]["half_lr"]:
    scheduler = ReduceLROnPlateau(optimizer=optimizer, factor=0.5, patience=5)
# Just after instantiating, save the args. Easy loading in the future.


In [None]:
exp_dir = conf["main_args"]["exp_dir"]
os.makedirs(exp_dir, exist_ok=True)

In [None]:
# Define Loss function.
loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")
system = System(
    model=model,
    loss_func=loss_func,
    optimizer=optimizer,
    train_loader=train_loader,
    val_loader=val_loader,
    scheduler=scheduler,
    config=conf,
)

In [None]:
# Define callbacks
callbacks = []
checkpoint_dir = os.path.join(exp_dir, "checkpoints/")
checkpoint = ModelCheckpoint(
    checkpoint_dir, monitor="val_loss", mode="min", save_top_k=5, verbose=True
)

In [None]:
callbacks.append(checkpoint)
if conf["training"]["early_stop"]:
    callbacks.append(EarlyStopping(monitor="val_loss", mode="min", patience=30, verbose=True))

In [None]:
gpus = -1 if torch.cuda.is_available() else None
#distributed_backend = "ddp" if torch.cuda.is_available() else None

# 4. Train modelo

In [None]:
trainer = pl.Trainer(
        max_epochs=conf["training"]["epochs"],
        callbacks=callbacks,
        default_root_dir=exp_dir,
        gpus=gpus,
        #distributed_backend=distributed_backend,
        gradient_clip_val=conf["training"]["gradient_clipping"],
        logger=neptune_logger
    )

In [None]:
trainer.fit(system)

# 5.Guardar mejor modelo

In [None]:
best_k = {k: v.item() for k, v in checkpoint.best_k_models.items()}
with open(os.path.join(exp_dir, "best_k_models.json"), "w") as f:
    json.dump(best_k, f, indent=0)

state_dict = torch.load(checkpoint.best_model_path)
system.load_state_dict(state_dict=state_dict["state_dict"])
system.cpu()

to_save = system.model.serialize()
best_model_path = os.path.join(exp_dir, "best_model.pth")
torch.save(to_save,best_model_path )

#Send best model to neptune
neptune_logger.experiment.log_artifact(best_model_path)

# 6. Test model despues entrenar

In [None]:
PATH_TEST = df_val.iloc[2,1]

path_best_model =  os.path.join(exp_dir, "best_model.pth")
best_model  = DPRNNTasNet.from_pretrained(path_best_model)
best_model.cuda()

mixture, _ = sf.read(PATH_TEST, dtype="float32", always_2d=True)
# Soundfile returns the mixture as shape (time, channels), and Asteroid expects (batch, channels, time)
mixture = mixture.transpose()
mixture = mixture.reshape(1, mixture.shape[0], mixture.shape[1])
out_wavs_after = best_model.separate(mixture)

In [None]:
display(Audio(PATH_TEST))
display(Audio(out_wavs_after[0,0,:],rate=8000))
display(Audio(out_wavs_after[0,1,:],rate=8000))

# 7. Test ejemplos no vistos

In [None]:
model_path = os.path.join(conf["main_args"]["exp_dir"], "best_model.pth")
model = DPRNNTasNet.from_pretrained(model_path)
# Handle device placement
model.cuda()

model_device = next(model.parameters()).device

test_set = CallSpanish(
    csv_path=PATH_CSV_TEST,
    task="sep_clean",
    sample_rate=8000,
    n_src=2,
    segment=None,
    return_id=True,
)
loss_func = PITLossWrapper(pairwise_neg_sisdr, pit_from="pw_mtx")
COMPUTE_METRICS = ["si_sdr", "sdr", "sir", "sar", "stoi"]

In [None]:
eval_save_dir = os.path.join(exp_dir, "metrics")
ex_save_dir = os.path.join(eval_save_dir, "examples/")
neptune_status=True

In [None]:
series_list = []
torch.no_grad().__enter__()
for idx in tqdm(range(len(test_set))):
    # Forward the network on the mixture.
    mix, sources, ids = test_set[idx]
    mix, sources = tensors_to_device([mix, sources], device=model_device)
    est_sources = model(mix[None, None])
    loss, reordered_sources = loss_func(est_sources, sources[None], return_est=True)
    mix_np = mix[None].cpu().data.numpy()
    sources_np = sources.cpu().data.numpy()
    est_sources_np = reordered_sources.squeeze(0).cpu().data.numpy()
    
    utt_metrics = get_metrics(
                mix_np,
                sources_np,
                est_sources_np,
                sample_rate=conf["data"]["sample_rate"],
                metrics_list=COMPUTE_METRICS,
            )

    utt_metrics["mix_path"] = test_set.mixture_path
    series_list.append(pd.Series(utt_metrics))


    # Save some examples in a folder. Wav files and metrics as text.
    if conf["test"]["n_save_examples"] == -1:
        conf["test"]["n_save_examples"] = len(test_set)
    
    save_idx = random.sample(range(len(test_set)),conf["test"]["n_save_examples"])


    if idx in save_idx:

        example_name = "ex_{}/".format(idx)

        local_save_dir = os.path.join(ex_save_dir, "ex_{}/".format(idx))
        os.makedirs(local_save_dir, exist_ok=True)
        sf.write(local_save_dir + "mixture.wav", mix_np[0], conf["data"]["sample_rate"])

        # Loop over the sources and estimates

        for src_idx, src in enumerate(sources_np):
            sf.write(local_save_dir + "s{}.wav".format(src_idx+1), src, conf["data"]["sample_rate"])
        
        for src_idx, est_src in enumerate(est_sources_np):
            path_estimation_source = local_save_dir + "s{}_estimate.wav".format(src_idx+1)
            sf.write(path_estimation_source,
                     est_src,
                     conf["data"]["sample_rate"],
            )
            if (neptune_status):
              neptune_logger.experiment.log_artifact(path_estimation_source)


        #Send estimation wavs
        mix_path = local_save_dir + "mixture.wav"
        if (neptune_status):
          neptune_logger.experiment.log_artifact(mix_path)

        neptune_status = False
                
        # Write local metrics to the example folder.
        with open(local_save_dir + "metrics.json", "w") as f:
            json.dump(utt_metrics, f, indent=0)

# 8. Metricas

In [None]:
all_metrics_df = pd.DataFrame(series_list)
all_metrics_path = os.path.join(eval_save_dir, "all_metrics.csv")
all_metrics_df.to_csv(all_metrics_path)

#Send All metrics
neptune_logger.experiment.log_artifact(all_metrics_path)

final_results = {}
for metric_name in COMPUTE_METRICS:
    input_metric_name = "input_" + metric_name
    ldf = all_metrics_df[metric_name] - all_metrics_df[input_metric_name]
    final_results[metric_name] = all_metrics_df[metric_name].mean()
    final_results[metric_name + "_imp"] = ldf.mean()

summary_metrics = os.path.join(eval_save_dir, "final_metrics.json")
with open(summary_metrics, "w") as f:
        json.dump(final_results, f, indent=0)

#Send summary metrics
neptune_logger.experiment.log_artifact(summary_metrics)

In [None]:
neptune_logger.experiment.stop()