# Imports

In [1]:
import os
import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import numpy as np
import random
from pytorch_lightning.loggers import WandbLogger
import wandb
import auraloss
import collections
from tqdm import tqdm
import pretty_midi
import matplotlib.pyplot as plt
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
print(torch.cuda.is_available())
import plotly.graph_objects as go
from torch.optim import lr_scheduler
from IPython.display import Audio
from torchaudio.transforms import Fade
import musdb
import museval
import gc

True


# Set Seeds

In [2]:
seed_value = 3407
torch.manual_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision('high')

# Construct Teh Datas

In [3]:
path = "D:/Github/phd-drum-sep/Data/musdb18hq/"

In [4]:
os.listdir(path)

['test', 'train']

In [5]:
train = list(os.listdir(path+'train'))
test = list(os.listdir(path+'test'))

In [6]:
sources = ['drum', 'bass', 'other', 'vocals']

In [7]:
all_scenes = {}
counter = 0
sample_rate = 44100
segment_length = sample_rate * 1

for idx, val in tqdm(enumerate(test)):
    p = path + 'test/' + val + "/"
    info = torchaudio.info(f"{p}mixture.wav")
    seconds = info.num_frames // 44100
    for i in range(0, seconds - 1, 1):
        start_point = i * 44100
        if start_point + 44100 < info.num_frames:
            all_scenes[counter] = {'music_path': p, 'start_point': start_point, 'length': 44100, 'frames' : info.num_frames}
            counter += 1

50it [00:00, 4545.59it/s]


In [8]:
def turn_transcription_into_roll(transcription, frames):
    # Determine your sampling frequency (frames per second)
    fs = 44100
    
    piano_roll_length = int(frames)
    
    # Initialize the piano roll array
    piano_roll = np.zeros((64, piano_roll_length))
    
    # Fill in the piano roll array
    for note in transcription.instruments[0].notes:
        # Convert start and end times to frame indices
        start_frame = int(np.floor(note.start * fs))
        end_frame = int(np.ceil(note.end * fs))
        
        # Set the corresponding frames to 1 (or note.velocity for a velocity-sensitive representation)
        piano_roll[note.pitch, start_frame:end_frame] = 1  # Or use note.velocity
        
    roll = np.vstack([piano_roll[35:36, :], piano_roll[38:39, :], piano_roll[42:43, :], piano_roll[47:48, :], piano_roll[49:50, :]])
    return roll

# model

In [9]:
class DrumDemucs(pl.LightningModule):
    def __init__(self):
        super(DrumDemucs, self).__init__()

        self.loss_fn = auraloss.freq.MultiResolutionSTFTLoss(
                    fft_sizes=[1024, 2048, 4096],
                    hop_sizes=[256, 512, 1024],
                    win_lengths=[1024, 2048, 4096],
                    scale="mel", 
                    n_bins=150,
                    sample_rate=44100,
                    device="cuda"
                )

        self.loss_fn_2 = auraloss.time.SISDRLoss()

        self.loss_fn_3 = torch.nn.L1Loss()

        self.loss_used = 0

        sources = ['drum',
                   'noise',
                   ]
        
        self.demucs_mixer =  torchaudio.models.HDemucs(
            sources=sources,
            audio_channels=7,
            depth=6,
        )

        self.out_conv = nn.Conv1d(in_channels=7, out_channels=2, kernel_size=1)
        self.out = nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1)      


    def compute_loss(self, outputs, ref_signals):
        loss = self.loss_fn(outputs, ref_signals) + self.loss_fn_2(outputs, ref_signals) +  self.loss_fn_3(outputs, ref_signals)
        return loss

    def forward(self, audio, drumroll):
        to_mix = torch.cat([audio, drumroll], axis=1)
        out = self.demucs_mixer(to_mix)
        out_2 = self.out_conv(out[:, 0, :, :])
        out_2 = self.out(out_2)
        # out_2 = torch.tanh(out_2)

        return out_2
    
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        audio, drum, drumroll = batch
        
        outputs = self.forward(audio, drumroll)
        # print(outputs.size())

        if batch_idx % 64 == 0:
            input_signal = audio[0].cpu().detach().numpy().T
            generated_signal = outputs[0].cpu().detach().numpy().T
            drum_signal = drum[0].cpu().detach().numpy().T 
            wandb.log({'audio_input': [wandb.Audio(input_signal, caption="Input", sample_rate=44100)]})
            wandb.log({'audio_reference': [wandb.Audio(drum_signal, caption="Reference", sample_rate=44100)]})
            wandb.log({'audio_output': [wandb.Audio(generated_signal, caption="Output", sample_rate=44100)]})
             
            for i in range(5):
                wandb.log({f'drum_{i + 1}': [wandb.Audio(drumroll[0].cpu().detach().numpy()[i, :], caption="Output", sample_rate=44100)]})


        loss = self.compute_loss(outputs, drum)         

        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    

    def configure_optimizers(self):
        # Define your optimizer and optionally learning rate scheduler here
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
        return [optimizer], [scheduler]
        

In [10]:
def load_audio(path, start_point, filename):
    audio_tensors = []
    waveform, _ = torchaudio.load(f"{path}/{filename}")
    return waveform

def load_roll(path, start_point, frames):
    midi = path + '/mixture.wav.mid'
    transcription = pretty_midi.PrettyMIDI(midi)
    roll = turn_transcription_into_roll(transcription, frames)

    return torch.from_numpy(roll).float()


# SISNR

In [11]:
def separate_sources(
    model,
    mix,
    drumroll,
    segment=4.0,
    overlap=0,
    device=None,
):
    """
    Apply model to a given mixture. Use fade, and add segments together in order to add model segment by segment.

    Args:
        segment (int): segment length in seconds
        device (torch.device, str, or None): if provided, device on which to
            execute the computation, otherwise `mix.device` is assumed.
            When `device` is different from `mix.device`, only local computations will
            be on `device`, while the entire tracks will be stored on `mix.device`.
    """
    if device is None:
        device = mix.device
    else:
        device = torch.device(device)

    batch, channels, length = mix.shape

    chunk_len = int(sample_rate * segment * (1 + overlap))
    start = 0
    end = chunk_len
    overlap_frames = overlap * sample_rate
    fade = Fade(fade_in_len=0, fade_out_len=int(overlap_frames), fade_shape="linear")

    final = torch.zeros(batch, channels, length, device=device)

    while start < length - overlap_frames:
        chunk = mix[:, :, start:end]
        roll = drumroll[:, :, start:end]
        roll = torch.ones_like(roll).to(model.device)
        with torch.no_grad():
            out = model.forward(chunk, roll)
        out = fade(out)
        final[:, :, start:end] += out
        if start == 0:
            fade.fade_in_len = int(overlap_frames)
            start += int(chunk_len - overlap_frames)
        else:
            start += chunk_len
        end += chunk_len
        if end >= length:
            fade.fade_out_len = 0
    return final

In [12]:
class AudioData:
    def __init__(self, audio):
        self.audio = audio

In [13]:
for name in ['epoch_280']:
    try:
        try:
            os.mkdir(f"D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/results_{name}_ones/")
        except:
            pass
    
        model = DrumDemucs.load_from_checkpoint(f'D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/checkpoint/{name}.ckpt')
        # model.to('cpu')
        model = model.eval()
        
        mus = musdb.DB(root="D:/Github/phd-drum-sep/Data/musdb18_stems/", subsets="test") 
        
        results = museval.EvalStore(frames_agg='median', tracks_agg='median')
        for track in tqdm(mus):
            try:
                path = "D:/Github/phd-drum-sep/Data/musdb18hq/"
                audio_path = path + 'test/' + track.name + "/"
            
            
                mixture_tensor = load_audio(audio_path, start_point,'mixture.wav').unsqueeze(0).to(model.device)
                drum_tensor = load_audio(audio_path, start_point,'drums.wav').unsqueeze(0).to(model.device)
                shape = mixture_tensor.shape[2]
                roll_tensor = load_roll(audio_path, start_point, shape).unsqueeze(0).to(model.device)
            
                seperated = separate_sources(model, mixture_tensor, roll_tensor, device='cuda')
            
                output_path = f"D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/results_{name}_ones/"
                
                audio = seperated.squeeze(0).cpu().numpy()
                audio = np.swapaxes(audio,0,1)
                estimates = {'drums': audio, 'bass': audio}
            
                d = drum_tensor.squeeze(0).cpu().numpy()
                d = np.swapaxes(d,0,1)
                
                track.targets['drums'] = AudioData(d)
                track.targets['bass'] = AudioData(d)
                
                scores = museval.eval_mus_track(
                    track, estimates, output_dir=f"{output_path}"
                )
            
                print(scores)
                results.add_track(scores)
                gc.collect()
                # break
            except:
                pass
        
        results.df.to_csv(f"{output_path}results.csv")
    
        del model
        del mus
    except Exception as e:
        print('error with model, skipping', name, e)

  2%|████                                                                                                                                                                                                    | 1/50 [00:33<27:37, 33.82s/it]

drums           ==> SDR:  -6.213  SIR: 228.320  ISR:   2.748  SAR:  -6.355  
bass            ==> SDR:  -6.213  SIR: 228.320  ISR:   2.748  SAR:  -6.355  



  4%|████████                                                                                                                                                                                                | 2/50 [01:06<26:40, 33.35s/it]

drums           ==> SDR:  -4.126  SIR: 182.823  ISR:   5.253  SAR:  -8.449  
bass            ==> SDR:  -4.126  SIR: 182.823  ISR:   5.253  SAR:  -8.449  



  6%|████████████                                                                                                                                                                                            | 3/50 [01:26<21:15, 27.14s/it]

drums           ==> SDR:  -5.441  SIR: 218.257  ISR:   3.923  SAR:  -9.451  
bass            ==> SDR:  -5.441  SIR: 218.257  ISR:   3.923  SAR:  -9.451  



  8%|████████████████                                                                                                                                                                                        | 4/50 [02:00<22:43, 29.63s/it]

drums           ==> SDR:  -1.320  SIR: 238.022  ISR:   9.380  SAR:  -4.226  
bass            ==> SDR:  -1.320  SIR: 238.022  ISR:   9.380  SAR:  -4.226  



 10%|████████████████████                                                                                                                                                                                    | 5/50 [02:38<24:31, 32.70s/it]

drums           ==> SDR:  -0.845  SIR: 191.059  ISR:  11.986  SAR:  -2.523  
bass            ==> SDR:  -0.845  SIR: 191.059  ISR:  11.986  SAR:  -2.523  



 12%|████████████████████████                                                                                                                                                                                | 6/50 [03:14<24:49, 33.86s/it]

drums           ==> SDR:  -1.460  SIR: 234.500  ISR:   7.171  SAR:  -4.763  
bass            ==> SDR:  -1.460  SIR: 234.500  ISR:   7.171  SAR:  -4.763  



 14%|████████████████████████████                                                                                                                                                                            | 7/50 [03:49<24:37, 34.35s/it]

drums           ==> SDR:   0.272  SIR: 199.864  ISR:   5.695  SAR:  -3.516  
bass            ==> SDR:   0.272  SIR: 199.864  ISR:   5.695  SAR:  -3.516  



 16%|████████████████████████████████                                                                                                                                                                        | 8/50 [04:23<23:56, 34.19s/it]

drums           ==> SDR:  -1.395  SIR: 242.448  ISR:   9.244  SAR:  -4.175  
bass            ==> SDR:  -1.395  SIR: 242.448  ISR:   9.244  SAR:  -4.175  



 18%|████████████████████████████████████                                                                                                                                                                    | 9/50 [04:58<23:38, 34.59s/it]

drums           ==> SDR:  -1.495  SIR: 171.625  ISR:   7.577  SAR:  -3.943  
bass            ==> SDR:  -1.495  SIR: 171.625  ISR:   7.577  SAR:  -3.943  



 20%|███████████████████████████████████████▊                                                                                                                                                               | 10/50 [05:33<22:56, 34.42s/it]

drums           ==> SDR:  -1.708  SIR: 214.061  ISR:   5.988  SAR:  -5.827  
bass            ==> SDR:  -1.708  SIR: 214.061  ISR:   5.988  SAR:  -5.827  



 22%|███████████████████████████████████████████▊                                                                                                                                                           | 11/50 [06:07<22:19, 34.34s/it]

drums           ==> SDR:   2.974  SIR: 221.552  ISR:   9.314  SAR:   0.850  
bass            ==> SDR:   2.974  SIR: 221.552  ISR:   9.314  SAR:   0.850  



 24%|███████████████████████████████████████████████▊                                                                                                                                                       | 12/50 [06:39<21:20, 33.69s/it]

drums           ==> SDR:  -1.001  SIR: 215.100  ISR:   4.125  SAR:  -4.017  
bass            ==> SDR:  -1.001  SIR: 215.100  ISR:   4.125  SAR:  -4.017  



 26%|███████████████████████████████████████████████████▋                                                                                                                                                   | 13/50 [06:59<18:11, 29.51s/it]

drums           ==> SDR:  -0.206  SIR: 171.096  ISR:   4.243  SAR:  -5.477  
bass            ==> SDR:  -0.206  SIR: 171.096  ISR:   4.243  SAR:  -5.477  



 28%|███████████████████████████████████████████████████████▋                                                                                                                                               | 14/50 [07:34<18:41, 31.14s/it]

drums           ==> SDR:   1.055  SIR: 209.035  ISR:  11.167  SAR:  -0.497  
bass            ==> SDR:   1.055  SIR: 209.035  ISR:  11.167  SAR:  -0.497  



 30%|███████████████████████████████████████████████████████████▋                                                                                                                                           | 15/50 [08:44<25:00, 42.88s/it]

drums           ==> SDR:  -4.548  SIR: 239.136  ISR:   6.725  SAR:  -6.253  
bass            ==> SDR:  -4.548  SIR: 239.136  ISR:   6.725  SAR:  -6.253  



 32%|███████████████████████████████████████████████████████████████▋                                                                                                                                       | 16/50 [09:21<23:17, 41.10s/it]

drums           ==> SDR:   0.887  SIR: 219.559  ISR:   9.101  SAR:  -1.914  
bass            ==> SDR:   0.887  SIR: 219.559  ISR:   9.101  SAR:  -1.914  



 34%|███████████████████████████████████████████████████████████████████▋                                                                                                                                   | 17/50 [09:39<18:47, 34.16s/it]

drums           ==> SDR:  -0.651  SIR: 223.761  ISR:   3.704  SAR:  -7.196  
bass            ==> SDR:  -0.651  SIR: 223.761  ISR:   3.704  SAR:  -7.196  



 36%|███████████████████████████████████████████████████████████████████████▋                                                                                                                               | 18/50 [10:12<18:00, 33.77s/it]

drums           ==> SDR:   0.343  SIR: 220.390  ISR:   9.756  SAR:  -1.801  
bass            ==> SDR:   0.343  SIR: 220.390  ISR:   9.756  SAR:  -1.801  



 38%|███████████████████████████████████████████████████████████████████████████▌                                                                                                                           | 19/50 [10:47<17:38, 34.14s/it]

drums           ==> SDR:   4.303  SIR: 220.580  ISR:  15.767  SAR:   3.323  
bass            ==> SDR:   4.303  SIR: 220.580  ISR:  15.767  SAR:   3.323  



 40%|███████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 20/50 [11:22<17:12, 34.42s/it]

drums           ==> SDR:   3.404  SIR: 217.594  ISR:  11.240  SAR:   2.051  
bass            ==> SDR:   3.404  SIR: 217.594  ISR:  11.240  SAR:   2.051  



 42%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                                                   | 21/50 [11:58<16:54, 34.98s/it]

drums           ==> SDR:  -0.491  SIR: 163.819  ISR:   8.802  SAR:  -1.541  
bass            ==> SDR:  -0.491  SIR: 163.819  ISR:   8.802  SAR:  -1.541  



 44%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                                                               | 22/50 [12:31<15:59, 34.26s/it]

drums           ==> SDR:  -4.067  SIR: 186.429  ISR:  10.997  SAR:  -6.095  
bass            ==> SDR:  -4.067  SIR: 186.429  ISR:  10.997  SAR:  -6.095  



 46%|███████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                           | 23/50 [13:06<15:30, 34.47s/it]

drums           ==> SDR:  -2.610  SIR: 163.483  ISR:   8.196  SAR:  -4.758  
bass            ==> SDR:  -2.610  SIR: 163.483  ISR:   8.196  SAR:  -4.758  



 48%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                       | 24/50 [13:38<14:37, 33.76s/it]

drums           ==> SDR:   1.897  SIR: 129.153  ISR:   9.152  SAR:   0.354  
bass            ==> SDR:   1.897  SIR: 129.153  ISR:   9.152  SAR:   0.354  



 50%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                   | 25/50 [14:12<14:06, 33.87s/it]

drums           ==> SDR:  -4.081  SIR: 231.561  ISR:   3.556  SAR: -11.216  
bass            ==> SDR:  -4.081  SIR: 231.561  ISR:   3.556  SAR: -11.216  



 52%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                               | 26/50 [14:47<13:41, 34.24s/it]

drums           ==> SDR:   1.204  SIR: 152.407  ISR:  11.621  SAR:   0.307  
bass            ==> SDR:   1.204  SIR: 152.407  ISR:  11.621  SAR:   0.307  



 54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                           | 27/50 [15:25<13:31, 35.29s/it]

drums           ==> SDR:   0.647  SIR: 172.032  ISR:  10.680  SAR:  -1.037  
bass            ==> SDR:   0.647  SIR: 172.032  ISR:  10.680  SAR:  -1.037  



 56%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                       | 28/50 [15:44<11:10, 30.47s/it]

drums           ==> SDR:   8.017  SIR: 191.383  ISR:  14.522  SAR:   8.449  
bass            ==> SDR:   8.017  SIR: 191.383  ISR:  14.522  SAR:   8.449  



 58%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                   | 29/50 [15:54<08:29, 24.27s/it]

drums           ==> SDR:   3.258  SIR: 211.663  ISR:   6.676  SAR:   2.641  
bass            ==> SDR:   3.258  SIR: 211.663  ISR:   6.676  SAR:   2.641  



 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                               | 30/50 [16:27<08:57, 26.86s/it]

drums           ==> SDR:   1.440  SIR: 208.478  ISR:  13.299  SAR:   0.878  
bass            ==> SDR:   1.440  SIR: 208.478  ISR:  13.299  SAR:   0.878  



 62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                           | 31/50 [17:00<09:06, 28.77s/it]

drums           ==> SDR:  -6.677  SIR: 207.745  ISR:   5.183  SAR:  -9.369  
bass            ==> SDR:  -6.677  SIR: 207.745  ISR:   5.183  SAR:  -9.369  



 64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 32/50 [17:35<09:12, 30.70s/it]

drums           ==> SDR:  -3.112  SIR: 239.982  ISR:   8.875  SAR:  -6.646  
bass            ==> SDR:  -3.112  SIR: 239.982  ISR:   8.875  SAR:  -6.646  



 66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 33/50 [18:11<09:10, 32.39s/it]

drums           ==> SDR:  -5.240  SIR: 245.641  ISR:   5.146  SAR: -10.299  
bass            ==> SDR:  -5.240  SIR: 245.641  ISR:   5.146  SAR: -10.299  



 68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                               | 34/50 [18:46<08:49, 33.07s/it]

drums           ==> SDR:  -2.184  SIR: 235.387  ISR:   5.723  SAR:  -6.102  
bass            ==> SDR:  -2.184  SIR: 235.387  ISR:   5.723  SAR:  -6.102  



 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 35/50 [19:21<08:24, 33.66s/it]

drums           ==> SDR:   3.491  SIR: 216.592  ISR:  13.988  SAR:   2.622  
bass            ==> SDR:   3.491  SIR: 216.592  ISR:  13.988  SAR:   2.622  



 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 36/50 [19:41<06:52, 29.47s/it]

drums           ==> SDR: -10.483  SIR: 196.296  ISR:   5.268  SAR: -14.782  
bass            ==> SDR: -10.483  SIR: 196.296  ISR:   5.268  SAR: -14.782  



 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 38/50 [20:17<05:03, 25.25s/it]

drums           ==> SDR:   0.103  SIR: 223.404  ISR:   8.698  SAR:  -2.517  
bass            ==> SDR:   0.103  SIR: 223.404  ISR:   8.698  SAR:  -2.517  



 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 39/50 [20:55<05:21, 29.26s/it]

drums           ==> SDR:  -1.112  SIR: 232.680  ISR:   4.952  SAR:  -5.457  
bass            ==> SDR:  -1.112  SIR: 232.680  ISR:   4.952  SAR:  -5.457  



 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 41/50 [21:32<03:46, 25.13s/it]

drums           ==> SDR: -11.359  SIR: 171.906  ISR:   9.585  SAR: -13.288  
bass            ==> SDR: -11.359  SIR: 171.906  ISR:   9.585  SAR: -13.288  



 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 42/50 [22:07<03:45, 28.20s/it]

drums           ==> SDR:   3.920  SIR: 130.739  ISR:  10.847  SAR:   2.329  
bass            ==> SDR:   3.920  SIR: 130.739  ISR:  10.847  SAR:   2.329  



 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                           | 43/50 [22:45<03:37, 31.06s/it]

drums           ==> SDR:  -1.507  SIR: 170.645  ISR:   6.639  SAR:  -5.488  
bass            ==> SDR:  -1.507  SIR: 170.645  ISR:   6.639  SAR:  -5.488  



 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 44/50 [23:18<03:10, 31.79s/it]

drums           ==> SDR:  -0.689  SIR: 195.174  ISR:   6.027  SAR:  -4.286  
bass            ==> SDR:  -0.689  SIR: 195.174  ISR:   6.027  SAR:  -4.286  



 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 46/50 [23:55<01:46, 26.66s/it]

drums           ==> SDR: -11.660  SIR: 217.548  ISR:   1.979  SAR: -14.037  
bass            ==> SDR: -11.660  SIR: 217.548  ISR:   1.979  SAR: -14.037  



 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 47/50 [24:15<01:13, 24.59s/it]

drums           ==> SDR:   4.285  SIR: 151.533  ISR:  10.989  SAR:   2.647  
bass            ==> SDR:   4.285  SIR: 151.533  ISR:  10.989  SAR:   2.647  



 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 48/50 [24:51<00:55, 27.91s/it]

drums           ==> SDR:  -1.726  SIR: 207.468  ISR:   8.994  SAR:  -4.886  
bass            ==> SDR:  -1.726  SIR: 207.468  ISR:   8.994  SAR:  -4.886  



 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 49/50 [25:25<00:29, 29.80s/it]

drums           ==> SDR:   3.144  SIR: 218.727  ISR:  16.287  SAR:   2.480  
bass            ==> SDR:   3.144  SIR: 218.727  ISR:  16.287  SAR:   2.480  



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [26:01<00:00, 31.23s/it]

drums           ==> SDR:   1.351  SIR: 205.683  ISR:   8.693  SAR:  -1.006  
bass            ==> SDR:   1.351  SIR: 205.683  ISR:   8.693  SAR:  -1.006  






In [14]:
results.agg_frames_tracks_scores()

target  metric
bass    ISR         8.698010
        SAR        -4.175500
        SDR        -0.844900
        SIR       211.662845
drums   ISR         8.698010
        SAR        -4.175500
        SDR        -0.844900
        SIR       211.662845
Name: score, dtype: float64