# Imports

In [1]:
import os
import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import numpy as np
import random
from pytorch_lightning.loggers import WandbLogger
import wandb
import auraloss
import collections
from tqdm import tqdm
import pretty_midi
import matplotlib.pyplot as plt
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
print(torch.cuda.is_available())
import plotly.graph_objects as go
from torch.optim import lr_scheduler
from IPython.display import Audio
from torchaudio.transforms import Fade
import musdb
import museval
import gc
from demucs import pretrained, htdemucs

True


# Set Seeds

In [2]:
seed_value = 3407
torch.manual_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision('high')

# Construct Teh Datas

In [3]:
path = "D:/Github/phd-drum-sep/Data/musdb18hq/"

In [4]:
os.listdir(path)

['test', 'train']

In [5]:
train = list(os.listdir(path+'train'))
test = list(os.listdir(path+'test'))

In [6]:
sources = ['drum', 'bass', 'other', 'vocals']

In [7]:
all_scenes = {}
counter = 0
sample_rate = 44100
segment_length = sample_rate * 1

for idx, val in tqdm(enumerate(test)):
    p = path + 'test/' + val + "/"
    info = torchaudio.info(f"{p}mixture.wav")
    seconds = info.num_frames // 44100
    for i in range(0, seconds - 1, 1):
        start_point = i * 44100
        if start_point + 44100 < info.num_frames:
            all_scenes[counter] = {'music_path': p, 'start_point': start_point, 'length': 44100, 'frames' : info.num_frames}
            counter += 1

50it [00:00, 5000.96it/s]


In [8]:
def turn_transcription_into_roll(transcription, frames):
    # Determine your sampling frequency (frames per second)
    fs = 44100
    
    piano_roll_length = int(frames)
    
    # Initialize the piano roll array
    piano_roll = np.zeros((64, piano_roll_length))
    
    # Fill in the piano roll array
    for note in transcription.instruments[0].notes:
        # Convert start and end times to frame indices
        start_frame = int(np.floor(note.start * fs))
        end_frame = int(np.ceil(note.end * fs))
        
        # Set the corresponding frames to 1 (or note.velocity for a velocity-sensitive representation)
        piano_roll[note.pitch, start_frame:end_frame] = 1  # Or use note.velocity
        
    roll = np.vstack([piano_roll[35:36, :], piano_roll[38:39, :], piano_roll[42:43, :], piano_roll[47:48, :], piano_roll[49:50, :]])
    return roll

# model

In [9]:


# # making the model
class NewSDRLoss(nn.Module):
    """
    New Signal-to-Distortion Ratio (SDR) loss module based on the MDX challenge definition.

    Args:
        reduction (str, optional): Specifies the reduction to apply to the output:
            'none': no reduction will be applied,
            'mean': the mean of the output will be calculated,
            'sum': the sum of the output will be calculated. Default: 'mean'.
    """
    def __init__(self, reduction='mean'):
        super(NewSDRLoss, self).__init__()
        self.reduction = reduction

    def forward(self, estimates, references):
        """
        Computes the SDR loss between the estimated and reference signals.

        Args:
            estimates (Tensor): Estimated signals, shape (batch, channels, height, width).
            references (Tensor): Reference signals, shape (batch, channels, height, width).

        Returns:
            Tensor: The calculated SDR loss.
        """
        assert references.dim() == 4 and estimates.dim() == 4, "Inputs must be 4D tensors."

        delta = 1e-7  # Avoid numerical errors
        num = torch.sum(torch.square(references), dim=(2, 3))
        den = torch.sum(torch.square(references - estimates), dim=(2, 3))
        num += delta
        den += delta
        scores = 10 * torch.log10(num / den)

        if self.reduction == 'mean':
            return -scores.mean()
        elif self.reduction == 'sum':
            return -scores.sum()
        else:  # 'none'
            return -scores
        
class DrumHybridDemucs(pl.LightningModule):
    def __init__(self):
        super(DrumHybridDemucs, self).__init__()

        self.loss_fn = auraloss.freq.MultiResolutionSTFTLoss(
                    fft_sizes=[1024, 2048, 4096],
                    hop_sizes=[256, 512, 1024],
                    win_lengths=[1024, 2048, 4096],
                    scale="mel", 
                    n_bins=150,
                    sample_rate=44100,
                    device="cuda"
                )

        self.loss_fn_2 = auraloss.time.SISDRLoss()

        self.loss_fn_3 = torch.nn.L1Loss()

        self.loss_fn_4 = NewSDRLoss()

        self.loss_used = 0

        sources = ['drum',
                   'noise',
                   ]
        
        self.demucs_mixer = htdemucs.HTDemucs(sources=sources, audio_channels=7)

        self.out_conv = nn.Conv1d(in_channels=7, out_channels=2, kernel_size=1)
        self.out = nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1)      


    def compute_loss(self, outputs, ref_signals):
        # loss = self.loss_fn(outputs, ref_signals) + self.loss_fn_2(outputs, ref_signals) +  self.loss_fn_3(outputs, ref_signals)
        loss = self.loss_fn_4(outputs.unsqueeze(2), ref_signals.unsqueeze(2))
        return loss
    
    def forward(self, audio, drumroll):
        to_mix = torch.cat([audio, drumroll], axis=1)
        out = self.demucs_mixer(to_mix)
        out_2 = self.out_conv(out[:, 0, :, :])
        out_2 = self.out(out_2)
        # out_2 = torch.tanh(out_2)

        return out_2
    
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        audio, drum, drumroll = batch
        
        outputs = self.forward(audio, drumroll)
        # print(outputs.size())

        if batch_idx % 256 == 0:
            input_signal = audio[0].cpu().detach().numpy().T
            generated_signal = outputs[0].cpu().detach().numpy().T
            drum_signal = drum[0].cpu().detach().numpy().T 
            wandb.log({'audio_input': [wandb.Audio(input_signal, caption="Input", sample_rate=44100)]})
            wandb.log({'audio_reference': [wandb.Audio(drum_signal, caption="Reference", sample_rate=44100)]})
            wandb.log({'audio_output': [wandb.Audio(generated_signal, caption="Output", sample_rate=44100)]})
             
            for i in range(5):
                wandb.log({f'drum_{i + 1}': [wandb.Audio(drumroll[0].cpu().detach().numpy()[i, :], caption="Output", sample_rate=44100)]})


        loss = self.compute_loss(outputs, drum)         

        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    

    def configure_optimizers(self):
        # Define your optimizer and optionally learning rate scheduler here
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
        return [optimizer], [scheduler]
        


In [10]:
def load_audio(path, start_point, filename):
    audio_tensors = []
    waveform, _ = torchaudio.load(f"{path}/{filename}")
    return waveform

def load_roll(path, start_point, frames):
    midi = path + '/mixture.wav.mid'
    transcription = pretty_midi.PrettyMIDI(midi)
    roll = turn_transcription_into_roll(transcription, frames)

    return torch.from_numpy(roll).float()


# SISNR

In [11]:
def separate_sources(
    model,
    mix,
    drumroll,
    segment=4.0,
    overlap=0,
    device=None,
):
    """
    Apply model to a given mixture. Use fade, and add segments together in order to add model segment by segment.

    Args:
        segment (int): segment length in seconds
        device (torch.device, str, or None): if provided, device on which to
            execute the computation, otherwise `mix.device` is assumed.
            When `device` is different from `mix.device`, only local computations will
            be on `device`, while the entire tracks will be stored on `mix.device`.
    """
    if device is None:
        device = mix.device
    else:
        device = torch.device(device)

    batch, channels, length = mix.shape

    chunk_len = int(sample_rate * segment * (1 + overlap))
    start = 0
    end = chunk_len
    overlap_frames = overlap * sample_rate
    fade = Fade(fade_in_len=0, fade_out_len=int(overlap_frames), fade_shape="linear")

    final = torch.zeros(batch, channels, length, device=device)

    while start < length - overlap_frames:
        chunk = mix[:, :, start:end]
        roll = drumroll[:, :, start:end]
        roll = torch.ones_like(roll).to(model.device)
        with torch.no_grad():
            out = model.forward(chunk, roll)
        out = fade(out)
        final[:, :, start:end] += out
        if start == 0:
            fade.fade_in_len = int(overlap_frames)
            start += int(chunk_len - overlap_frames)
        else:
            start += chunk_len
        end += chunk_len
        if end >= length:
            fade.fade_out_len = 0
    return final

In [12]:
class AudioData:
    def __init__(self, audio):
        self.audio = audio

In [13]:
for name in ['epoch_280']:
    try:
        try:
            os.mkdir(f"D:/Github/phd-drum-sep/analysis/ht_demucs_small_analysis/results_{name}_ones/")
        except:
            pass
    
        model = DrumHybridDemucs.load_from_checkpoint(f'D:/Github/phd-drum-sep/analysis/ht_demucs_small_analysis/checkpoint/{name}.ckpt')
        # model.to('cpu')
        model = model.eval()
        
        mus = musdb.DB(root="D:/Github/phd-drum-sep/Data/musdb18_stems/", subsets="test") 
        
        results = museval.EvalStore(frames_agg='median', tracks_agg='median')
        for track in tqdm(mus):
            try:
                path = "D:/Github/phd-drum-sep/Data/musdb18hq/"
                audio_path = path + 'test/' + track.name + "/"
            
            
                mixture_tensor = load_audio(audio_path, start_point,'mixture.wav').unsqueeze(0).to(model.device)
                drum_tensor = load_audio(audio_path, start_point,'drums.wav').unsqueeze(0).to(model.device)
                shape = mixture_tensor.shape[2]
                roll_tensor = load_roll(audio_path, start_point, shape).unsqueeze(0).to(model.device)
            
                seperated = separate_sources(model, mixture_tensor, roll_tensor, device='cuda')
            
                output_path = f"D:/Github/phd-drum-sep/analysis/ht_demucs_small_analysis/results_{name}_ones/"
                
                audio = seperated.squeeze(0).cpu().numpy()
                audio = np.swapaxes(audio,0,1)
                estimates = {'drums': audio, 'bass': audio}
            
                d = drum_tensor.squeeze(0).cpu().numpy()
                d = np.swapaxes(d,0,1)
                
                track.targets['drums'] = AudioData(d)
                track.targets['bass'] = AudioData(d)
                
                scores = museval.eval_mus_track(
                    track, estimates, output_dir=f"{output_path}"
                )
            
                print(scores)
                results.add_track(scores)
                gc.collect()
                # break
            except:
                pass
        
        results.df.to_csv(f"{output_path}results.csv")
    
        del model
        del mus
        
    except Exception as e:
        print('error with model, skipping', name, e)

C:\Python311\Lib\site-packages\pytorch_lightning\utilities\migration\utils.py:55: The loaded checkpoint was produced with Lightning v2.2.1, which is newer than your current Lightning version: v2.1.2
  2%|████                                                                                                                                                                                                    | 1/50 [00:36<29:35, 36.24s/it]

drums           ==> SDR:   4.122  SIR: 228.721  ISR:   9.581  SAR:   3.204  
bass            ==> SDR:   4.122  SIR: 228.721  ISR:   9.581  SAR:   3.204  



  4%|████████                                                                                                                                                                                                | 2/50 [01:09<27:44, 34.67s/it]

drums           ==> SDR:   0.881  SIR: 188.397  ISR:   8.773  SAR:  -1.998  
bass            ==> SDR:   0.881  SIR: 188.397  ISR:   8.773  SAR:  -1.998  



  6%|████████████                                                                                                                                                                                            | 3/50 [01:31<22:21, 28.53s/it]

drums           ==> SDR:   2.020  SIR: 227.089  ISR:   9.957  SAR:  -0.092  
bass            ==> SDR:   2.020  SIR: 227.089  ISR:   9.957  SAR:  -0.092  



  8%|████████████████                                                                                                                                                                                        | 4/50 [02:06<23:56, 31.22s/it]

drums           ==> SDR:  -1.216  SIR: 236.837  ISR:  12.262  SAR:  -3.290  
bass            ==> SDR:  -1.216  SIR: 236.837  ISR:  12.262  SAR:  -3.290  



 10%|████████████████████                                                                                                                                                                                    | 5/50 [02:48<26:13, 34.98s/it]

drums           ==> SDR:   6.449  SIR: 198.168  ISR:  14.501  SAR:   5.648  
bass            ==> SDR:   6.449  SIR: 198.168  ISR:  14.501  SAR:   5.648  



 12%|████████████████████████                                                                                                                                                                                | 6/50 [03:22<25:35, 34.90s/it]

drums           ==> SDR:   3.951  SIR: 241.140  ISR:   9.083  SAR:   2.052  
bass            ==> SDR:   3.951  SIR: 241.140  ISR:   9.083  SAR:   2.052  



 14%|████████████████████████████                                                                                                                                                                            | 7/50 [03:56<24:47, 34.60s/it]

drums           ==> SDR:   4.179  SIR: 192.911  ISR:  11.826  SAR:   2.751  
bass            ==> SDR:   4.179  SIR: 192.911  ISR:  11.826  SAR:   2.751  



 16%|████████████████████████████████                                                                                                                                                                        | 8/50 [04:32<24:24, 34.86s/it]

drums           ==> SDR:   2.346  SIR: 246.221  ISR:   9.468  SAR:   0.151  
bass            ==> SDR:   2.346  SIR: 246.221  ISR:   9.468  SAR:   0.151  



 18%|████████████████████████████████████                                                                                                                                                                    | 9/50 [05:10<24:28, 35.83s/it]

drums           ==> SDR:   3.420  SIR: 175.517  ISR:   5.866  SAR:   1.306  
bass            ==> SDR:   3.420  SIR: 175.517  ISR:   5.866  SAR:   1.306  



 20%|███████████████████████████████████████▊                                                                                                                                                               | 10/50 [05:46<23:56, 35.90s/it]

drums           ==> SDR:   2.704  SIR: 222.817  ISR:   9.252  SAR:   0.582  
bass            ==> SDR:   2.704  SIR: 222.817  ISR:   9.252  SAR:   0.582  



 22%|███████████████████████████████████████████▊                                                                                                                                                           | 11/50 [06:23<23:37, 36.33s/it]

drums           ==> SDR:   6.483  SIR: 214.925  ISR:  12.468  SAR:   5.723  
bass            ==> SDR:   6.483  SIR: 214.925  ISR:  12.468  SAR:   5.723  



 24%|███████████████████████████████████████████████▊                                                                                                                                                       | 12/50 [06:57<22:37, 35.73s/it]

drums           ==> SDR:   5.375  SIR: 209.608  ISR:   6.984  SAR:   4.778  
bass            ==> SDR:   5.375  SIR: 209.608  ISR:   6.984  SAR:   4.778  



 26%|███████████████████████████████████████████████████▋                                                                                                                                                   | 13/50 [07:18<19:11, 31.12s/it]

drums           ==> SDR:   1.399  SIR: 167.040  ISR:   3.072  SAR:  -1.772  
bass            ==> SDR:   1.399  SIR: 167.040  ISR:   3.072  SAR:  -1.772  



 28%|███████████████████████████████████████████████████████▋                                                                                                                                               | 14/50 [07:55<19:44, 32.89s/it]

drums           ==> SDR:   6.839  SIR: 211.464  ISR:  12.510  SAR:   6.461  
bass            ==> SDR:   6.839  SIR: 211.464  ISR:  12.510  SAR:   6.461  



 30%|███████████████████████████████████████████████████████████▋                                                                                                                                           | 15/50 [09:04<25:38, 43.97s/it]

drums           ==> SDR:   4.494  SIR: 243.031  ISR:  11.296  SAR:   3.294  
bass            ==> SDR:   4.494  SIR: 243.031  ISR:  11.296  SAR:   3.294  



 32%|███████████████████████████████████████████████████████████████▋                                                                                                                                       | 16/50 [09:43<23:59, 42.34s/it]

drums           ==> SDR:   3.911  SIR: 221.709  ISR:  13.419  SAR:   2.483  
bass            ==> SDR:   3.911  SIR: 221.709  ISR:  13.419  SAR:   2.483  



 34%|███████████████████████████████████████████████████████████████████▋                                                                                                                                   | 17/50 [10:02<19:21, 35.18s/it]

drums           ==> SDR:   1.124  SIR: 226.004  ISR:   3.404  SAR:  -2.942  
bass            ==> SDR:   1.124  SIR: 226.004  ISR:   3.404  SAR:  -2.942  



 36%|███████████████████████████████████████████████████████████████████████▋                                                                                                                               | 18/50 [10:35<18:30, 34.70s/it]

drums           ==> SDR:   4.974  SIR: 227.974  ISR:  12.863  SAR:   4.159  
bass            ==> SDR:   4.974  SIR: 227.974  ISR:  12.863  SAR:   4.159  



 38%|███████████████████████████████████████████████████████████████████████████▌                                                                                                                           | 19/50 [11:11<18:09, 35.13s/it]

drums           ==> SDR:   6.465  SIR: 224.982  ISR:  18.462  SAR:   5.869  
bass            ==> SDR:   6.465  SIR: 224.982  ISR:  18.462  SAR:   5.869  



 40%|███████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 20/50 [11:48<17:49, 35.65s/it]

drums           ==> SDR:   1.116  SIR: 221.078  ISR:   1.680  SAR:   3.676  
bass            ==> SDR:   1.116  SIR: 221.078  ISR:   1.680  SAR:   3.676  



 42%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                                                   | 21/50 [12:26<17:31, 36.27s/it]

drums           ==> SDR:   4.687  SIR: 155.201  ISR:  12.497  SAR:   3.759  
bass            ==> SDR:   4.687  SIR: 155.201  ISR:  12.497  SAR:   3.759  



 44%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                                                               | 22/50 [13:02<16:54, 36.23s/it]

drums           ==> SDR:   0.804  SIR: 180.506  ISR:  13.071  SAR:  -0.828  
bass            ==> SDR:   0.804  SIR: 180.506  ISR:  13.071  SAR:  -0.828  



 46%|███████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                           | 23/50 [13:40<16:32, 36.75s/it]

drums           ==> SDR:   3.158  SIR: 169.554  ISR:  11.499  SAR:   1.389  
bass            ==> SDR:   3.158  SIR: 169.554  ISR:  11.499  SAR:   1.389  



 48%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                       | 24/50 [14:15<15:40, 36.18s/it]

drums           ==> SDR:   6.543  SIR: 144.954  ISR:  12.737  SAR:   6.446  
bass            ==> SDR:   6.543  SIR: 144.954  ISR:  12.737  SAR:   6.446  



 50%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                   | 25/50 [14:50<14:57, 35.92s/it]

drums           ==> SDR:   0.439  SIR: 235.785  ISR:   6.367  SAR:  -3.627  
bass            ==> SDR:   0.439  SIR: 235.785  ISR:   6.367  SAR:  -3.627  



 52%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                               | 26/50 [15:30<14:51, 37.17s/it]

drums           ==> SDR:   8.624  SIR: 164.437  ISR:  16.580  SAR:   8.558  
bass            ==> SDR:   8.624  SIR: 164.437  ISR:  16.580  SAR:   8.558  



 54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                           | 27/50 [16:10<14:35, 38.07s/it]

drums           ==> SDR:   5.405  SIR: 173.510  ISR:  13.164  SAR:   4.872  
bass            ==> SDR:   5.405  SIR: 173.510  ISR:  13.164  SAR:   4.872  



 56%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                       | 28/50 [16:31<12:02, 32.83s/it]

drums           ==> SDR:  10.348  SIR: 200.239  ISR:  14.999  SAR:  10.838  
bass            ==> SDR:  10.348  SIR: 200.239  ISR:  14.999  SAR:  10.838  



 58%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                   | 29/50 [16:42<09:13, 26.35s/it]

drums           ==> SDR:   5.296  SIR: 211.475  ISR:   7.913  SAR:   5.989  
bass            ==> SDR:   5.296  SIR: 211.475  ISR:   7.913  SAR:   5.989  



 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                               | 30/50 [17:17<09:39, 28.96s/it]

drums           ==> SDR:   2.659  SIR: 205.685  ISR:  12.849  SAR:   1.701  
bass            ==> SDR:   2.659  SIR: 205.685  ISR:  12.849  SAR:   1.701  



 62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                           | 31/50 [17:52<09:40, 30.57s/it]

drums           ==> SDR:   2.552  SIR: 190.578  ISR:  10.215  SAR:   0.485  
bass            ==> SDR:   2.552  SIR: 190.578  ISR:  10.215  SAR:   0.485  



 64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 32/50 [18:29<09:45, 32.53s/it]

drums           ==> SDR:   0.404  SIR: 239.469  ISR:   8.773  SAR:  -1.850  
bass            ==> SDR:   0.404  SIR: 239.469  ISR:   8.773  SAR:  -1.850  



 66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 33/50 [19:08<09:45, 34.45s/it]

drums           ==> SDR:   1.303  SIR: 243.549  ISR:   6.420  SAR:  -2.497  
bass            ==> SDR:   1.303  SIR: 243.549  ISR:   6.420  SAR:  -2.497  



 68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                               | 34/50 [19:44<09:19, 34.97s/it]

drums           ==> SDR:   2.696  SIR: 236.434  ISR:   5.588  SAR:   0.775  
bass            ==> SDR:   2.696  SIR: 236.434  ISR:   5.588  SAR:   0.775  



 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 35/50 [20:20<08:49, 35.31s/it]

drums           ==> SDR:   8.090  SIR: 219.289  ISR:  14.927  SAR:   7.746  
bass            ==> SDR:   8.090  SIR: 219.289  ISR:  14.927  SAR:   7.746  



 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 36/50 [20:44<07:26, 31.86s/it]

drums           ==> SDR:   1.140  SIR: 199.363  ISR:   7.618  SAR:  -1.616  
bass            ==> SDR:   1.140  SIR: 199.363  ISR:   7.618  SAR:  -1.616  



 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 37/50 [21:54<09:22, 43.30s/it]

drums           ==> SDR:   3.730  SIR: 237.210  ISR:  14.385  SAR:   2.676  
bass            ==> SDR:   3.730  SIR: 237.210  ISR:  14.385  SAR:   2.676  



 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 38/50 [22:30<08:14, 41.22s/it]

drums           ==> SDR:   3.475  SIR: 222.483  ISR:   9.093  SAR:   2.495  
bass            ==> SDR:   3.475  SIR: 222.483  ISR:   9.093  SAR:   2.495  



 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 39/50 [23:09<07:25, 40.52s/it]

drums           ==> SDR:   2.655  SIR: 230.693  ISR:   5.786  SAR:   1.654  
bass            ==> SDR:   2.655  SIR: 230.693  ISR:   5.786  SAR:   1.654  



 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 40/50 [23:49<06:44, 40.42s/it]

drums           ==> SDR:   4.443  SIR: 192.135  ISR:  10.116  SAR:   3.161  
bass            ==> SDR:   4.443  SIR: 192.135  ISR:  10.116  SAR:   3.161  



 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 41/50 [24:24<05:49, 38.87s/it]

drums           ==> SDR:   4.936  SIR: 176.725  ISR:  13.778  SAR:   4.119  
bass            ==> SDR:   4.936  SIR: 176.725  ISR:  13.778  SAR:   4.119  



 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 42/50 [25:00<05:03, 37.89s/it]

drums           ==> SDR:   6.763  SIR: 145.094  ISR:  12.979  SAR:   6.032  
bass            ==> SDR:   6.763  SIR: 145.094  ISR:  12.979  SAR:   6.032  



 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                           | 43/50 [25:38<04:25, 37.91s/it]

drums           ==> SDR:   0.996  SIR: 176.435  ISR:   8.712  SAR:  -1.633  
bass            ==> SDR:   0.996  SIR: 176.435  ISR:   8.712  SAR:  -1.633  



 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 44/50 [26:12<03:40, 36.75s/it]

drums           ==> SDR:   2.579  SIR: 195.689  ISR:   5.324  SAR:   0.358  
bass            ==> SDR:   2.579  SIR: 195.689  ISR:   5.324  SAR:   0.358  



 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                    | 45/50 [26:52<03:08, 37.70s/it]

drums           ==> SDR:   2.627  SIR: 199.998  ISR:   5.466  SAR:   0.131  
bass            ==> SDR:   2.627  SIR: 199.998  ISR:   5.466  SAR:   0.131  



 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 46/50 [27:29<02:29, 37.38s/it]

drums           ==> SDR:   0.500  SIR: 219.794  ISR:   3.024  SAR:  -3.838  
bass            ==> SDR:   0.500  SIR: 219.794  ISR:   3.024  SAR:  -3.838  



 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 47/50 [27:49<01:36, 32.29s/it]

drums           ==> SDR:   7.877  SIR: 153.319  ISR:  14.088  SAR:   7.308  
bass            ==> SDR:   7.877  SIR: 153.319  ISR:  14.088  SAR:   7.308  



 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 48/50 [28:25<01:06, 33.31s/it]

drums           ==> SDR:   1.320  SIR: 200.368  ISR:   9.415  SAR:  -1.059  
bass            ==> SDR:   1.320  SIR: 200.368  ISR:   9.415  SAR:  -1.059  



 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 49/50 [29:00<00:33, 33.78s/it]

drums           ==> SDR:   5.113  SIR: 210.366  ISR:  17.481  SAR:   4.456  
bass            ==> SDR:   5.113  SIR: 210.366  ISR:  17.481  SAR:   4.456  



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [29:35<00:00, 35.51s/it]

drums           ==> SDR:   5.301  SIR: 212.308  ISR:  12.645  SAR:   4.194  
bass            ==> SDR:   5.301  SIR: 212.308  ISR:  12.645  SAR:   4.194  






In [14]:
results.agg_frames_tracks_scores()

target  metric
bass    ISR        10.165520
        SAR         2.489147
        SDR         3.602670
        SIR       210.914925
drums   ISR        10.165520
        SAR         2.489147
        SDR         3.602670
        SIR       210.914925
Name: score, dtype: float64