#### Testing Dataset

In [1]:
import os
import random
import math
from glob import glob
import torch as th
import torchaudio
import pytorch_lightning as pl
from typing import Optional, List
import torch.nn.functional as F
import pandas as pd 
from tqdm.notebook import tqdm 
import numpy as np

from utils.measure_time import measure_time 

In [3]:
import argparse
import sys
from utils.load_config import load_config  

parser = argparse.ArgumentParser()
parser.add_argument("-p", "--hparams", type=str, default="./configs/train_rnn.yml", help="hparams config file")
args, unknown = parser.parse_known_args()  # Игнорирует нераспознанные аргументы
cfg = load_config(args.hparams)

In [4]:
datamodule = AudioDataModule(**cfg['data']).setup(stage = 'train')

Size of training set: 420
Size of validation set: 63
Elapsed time 'setup': 00:00:02.20


In [5]:
dataloaders = {'train': datamodule.train_dataloader(), 'valid': datamodule.val_dataloader()}

In [6]:
# Получение первого батча данных из DataLoader
dataloader = dataloaders['train'] 
sample_mix, sample_refs = next(iter(dataloader))  # Используем iter и next для доступа к данным

In [7]:
print(sample_mix, '\n')
print('chunks_num', len(sample_mix), '\n')
print(sample_mix[0], '\n')
print(sample_mix[0].shape, '\n')
print('----------------------------------------------', '\n')
print('spekears num', len(sample_refs), '\n')
print('firs_speaker list:', sample_refs[0], '\n')
print('chunks_nums', len(sample_refs[0]), '\n')
print(sample_refs[0][0].shape, '\n')

[tensor([[0.0016, 0.0045, 0.0016,  ..., 0.0505, 0.1465, 0.1519]])] 

chunks_num 1 

tensor([[0.0016, 0.0045, 0.0016,  ..., 0.0505, 0.1465, 0.1519]]) 

torch.Size([1, 32000]) 

---------------------------------------------- 

spekears num 2 

firs_speaker list: [tensor([[-0.0042, -0.0083, -0.0139,  ..., -0.0009, -0.0040, -0.0038]])] 

chunks_nums 1 

torch.Size([1, 32000]) 



#### Testing dataloaders LAST UPDATE. 

In [1]:
import argparse
import sys
from utils.load_config import load_config  

parser = argparse.ArgumentParser()
parser.add_argument("-p", "--hparams", type=str, default="./configs/train_rnn.yml", help="hparams config file")
args, unknown = parser.parse_known_args()  # Игнорирует нераспознанные аргументы
cfg = load_config(args.hparams)

In [2]:
from data.DiarizationDataset import DiarizationDataset
datamodule = DiarizationDataset(**cfg['datasets']).setup(stage = 'train')
dataloaders = {'train': datamodule.train_dataloader(), 'valid': datamodule.val_dataloader()}

Size of training set: 3140
Size of validation set: 641
Elapsed time 'setup': 00:00:01.99


In [3]:
# # Получение первого батча данных из DataLoader
# dataloader = dataloaders['train'] 
# sample_mix, sample_refs = next(iter(dataloader))  
# print(sample_mix)
# print('chunks_num', len(sample_mix))
# print(sample_mix[0])
# print(sample_mix[0].shape)
# print('----------------------------------------------')
# print('spekears num', len(sample_refs))
# print('firs_speaker list:', sample_refs[0])
# print('chunks_nums', len(sample_refs[0]))
# print(sample_refs[0][0].shape)

tensor([[0.0209, 0.0117, 0.0137,  ..., 0.0167, 0.0109, 0.0182]])
chunks_num 1
tensor([0.0209, 0.0117, 0.0137,  ..., 0.0167, 0.0109, 0.0182])
torch.Size([32000])
----------------------------------------------
spekears num 2
firs_speaker list: tensor([[ 0.0179,  0.0152,  0.0104,  ...,  0.0005, -0.0008, -0.0029]])
chunks_nums 1
torch.Size([32000])


#### Training NEW

In [37]:
import os
import torch
import torchmetrics
import argparse
from pathlib import Path
from torch.utils.tensorboard import SummaryWriter as TensorBoard
from tqdm.notebook import tqdm
from losses import Loss

from utils.load_config import load_config 
from utils.training import metadata_info, configure_optimizer
from utils.measure_time import measure_time
from utils.training import p_output_log 
from models.model_rnn import Dual_RNN_model

In [21]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.set_float32_matmul_precision('medium')

model = Dual_RNN_model(**cfg['model'])

metadata_info(model)
writer = TensorBoard(f'tb_logs/{Path(args.hparams).stem}', comment = f"{cfg['trainer']['ckpt_folder']}")
optimizer = configure_optimizer (cfg, model)

Trainable parametrs: 2633729
Size of model: 10.05 MB, in float32 



In [47]:
class Trainer:
    def __init__(self, num_epochs = 100, device='cuda', best_weights = False, checkpointing = False, 
                 checkpoint_interval = 10, model_name = '', path_to_weights= './weights', ckpt_folder = '',
                 speaker_num = 2, resume = False) -> None:
        self.num_epochs = num_epochs
        self.device = device
        self.best_weights = best_weights
        self.checkpointing = checkpointing
        self.checkpoint_interval = checkpoint_interval
        self.model_name = model_name
        os.makedirs(path_to_weights, exist_ok=True)
        self.path_to_weights = path_to_weights
        self.ckpt_folder = ckpt_folder
        self.speaker_num = speaker_num
        self.resume = resume

    @measure_time
    def fit(self, model, dataloaders, criterion, optimizer, writer) -> None:
        model.to(self.device)
        min_val_loss = float('inf')
        for epoch in range(self.num_epochs):
            for phase in ['train', 'valid']:
                model.train() if phase == 'train' else model.eval()
                dataloader = dataloaders[phase] 
                running_loss = 0.0
                for inputs, labels in tqdm(dataloader):
                    inputs = inputs.to(self.device)
                    labels = [l.to(self.device) for l in labels]
                    with torch.set_grad_enabled(phase == 'train'):
                        outputs = model(inputs)
                        loss = criterion(outputs, labels)
                        if phase == 'train':
                            optimizer.zero_grad()
                            loss.backward()
                            optimizer.step()
                    running_loss += loss.item()
                epoch_loss = running_loss / len(dataloader.dataset)
                print('epoch_loss', epoch_loss)
                break
                # p_output_log(self.num_epochs, epoch, epoch_loss)

In [48]:
Trainer(**cfg['trainer']).fit(model, dataloaders, Loss, optimizer, writer)

  0%|          | 0/3140 [00:00<?, ?it/s]

epoch_loss 17.45282705118702


  0%|          | 0/3140 [00:00<?, ?it/s]

epoch_loss 17.38201308736376


  0%|          | 0/3140 [00:00<?, ?it/s]

epoch_loss 17.80931663908017


  0%|          | 0/3140 [00:00<?, ?it/s]

epoch_loss 20.055409371321367


  0%|          | 0/3140 [00:00<?, ?it/s]

epoch_loss 17.739234594478727


  0%|          | 0/3140 [00:00<?, ?it/s]

KeyboardInterrupt: 

#### Training 

In [8]:
import os
import torch
import torchmetrics
import argparse
from pathlib import Path
from torch.utils.tensorboard import SummaryWriter as TensorBoard

from utils.load_config import load_config 
from utils.training import metadata_info, configure_optimizer
from utils.measure_time import measure_time
from utils.training import p_output_log 
from models.model_rnn import Dual_RNN_model

In [9]:
torch.backends.cuda.matmul.allow_tf32 = True
torch.backends.cudnn.allow_tf32 = True
torch.set_float32_matmul_precision('medium')

model = Dual_RNN_model(**cfg['model'])

metadata_info(model)
writer = TensorBoard(f'tb_logs/{Path(args.hparams).stem}', comment = f"{cfg['trainer']['ckpt_folder']}")
optimizer = configure_optimizer (cfg, model)

Trainable parametrs: 2633729
Size of model: 10.05 MB, in float32 



In [10]:
import torch
from itertools import permutations

def sisnr(x, s, eps=1e-8):
    """
    calculate training loss
    input:
          x: separated signal, N x S tensor
          s: reference signal, N x S tensor
    Return:
          sisnr: N tensor
    """
    # print(x.shape)
    def l2norm(mat, keepdim=False):
        return torch.norm(mat, dim=-1, keepdim=keepdim)

    if x.shape != s.shape:
        raise RuntimeError(
            "Dimention mismatch when calculate si-snr, {} vs {}".format(
                x.shape, s.shape))
    x_zm = x - torch.mean(x, dim=-1, keepdim=True)
    s_zm = s - torch.mean(s, dim=-1, keepdim=True)
    t = torch.sum(
        x_zm * s_zm, dim=-1,
        keepdim=True) * s_zm / (l2norm(s_zm, keepdim=True)**2 + eps)
    return 20 * torch.log10(eps + l2norm(t) / (l2norm(x_zm - t) + eps))


def CustomLoss(ests, egs):
    # print('ests', ests)
    # print('len ests', len(ests))
    # print('egs', egs)
    # print('len egs', len(egs))
    # spks x n x S
    refs = egs
    num_spks = len(refs)

    def sisnr_loss(permute):
        # print(f"Length of ests: {len(ests)}, Length of refs: {len(refs)}")
        # print(f"Permute: {permute}")
        # for one permute
        return sum([sisnr(ests[s], refs[t]) for s, t in enumerate(permute)]) / len(permute)  # average the value

    # P x N
    N = egs[0].size(0)
    sisnr_mat = torch.stack(
        [sisnr_loss(p) for p in permutations(range(num_spks))])
    max_perutt, _ = torch.max(sisnr_mat, dim=0)
    # si-snr
    return -torch.sum(max_perutt) / N
