# Imports

In [1]:
import os
import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import numpy as np
import random
from pytorch_lightning.loggers import WandbLogger
import wandb
import auraloss
import collections
from tqdm import tqdm
import pretty_midi
import matplotlib.pyplot as plt
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
print(torch.cuda.is_available())
import plotly.graph_objects as go
from torch.optim import lr_scheduler
from IPython.display import Audio
from torchaudio.transforms import Fade
import musdb
import museval
import gc

True


# Set Seeds

In [2]:
seed_value = 3407
torch.manual_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision('high')

# Construct Teh Datas

In [3]:
path = "D:/Github/phd-drum-sep/Data/musdb18hq/"

In [4]:
os.listdir(path)

['test', 'train']

In [5]:
train = list(os.listdir(path+'train'))
test = list(os.listdir(path+'test'))

In [6]:
sources = ['drum', 'bass', 'other', 'vocals']

In [7]:
all_scenes = {}
counter = 0
sample_rate = 44100
segment_length = sample_rate * 1

for idx, val in tqdm(enumerate(test)):
    p = path + 'test/' + val + "/"
    info = torchaudio.info(f"{p}mixture.wav")
    seconds = info.num_frames // 44100
    for i in range(0, seconds - 1, 1):
        start_point = i * 44100
        if start_point + 44100 < info.num_frames:
            all_scenes[counter] = {'music_path': p, 'start_point': start_point, 'length': 44100, 'frames' : info.num_frames}
            counter += 1

50it [00:00, 4834.82it/s]


In [8]:
def turn_transcription_into_roll(transcription, frames):
    # Determine your sampling frequency (frames per second)
    fs = 44100
    
    piano_roll_length = int(frames)
    
    # Initialize the piano roll array
    piano_roll = np.zeros((64, piano_roll_length))
    
    # Fill in the piano roll array
    for note in transcription.instruments[0].notes:
        # Convert start and end times to frame indices
        start_frame = int(np.floor(note.start * fs))
        end_frame = int(np.ceil(note.end * fs))
        
        # Set the corresponding frames to 1 (or note.velocity for a velocity-sensitive representation)
        piano_roll[note.pitch, start_frame:end_frame] = 1  # Or use note.velocity
        
    roll = np.vstack([piano_roll[35:36, :], piano_roll[38:39, :], piano_roll[42:43, :], piano_roll[47:48, :], piano_roll[49:50, :]])
    return roll

# model

In [9]:
class DrumDemucs(pl.LightningModule):
    def __init__(self):
        super(DrumDemucs, self).__init__()

        self.loss_fn = auraloss.freq.MultiResolutionSTFTLoss(
                    fft_sizes=[1024, 2048, 4096],
                    hop_sizes=[256, 512, 1024],
                    win_lengths=[1024, 2048, 4096],
                    scale="mel", 
                    n_bins=150,
                    sample_rate=44100,
                    device="cuda"
                )

        self.loss_fn_2 = auraloss.time.SISDRLoss()

        self.loss_fn_3 = torch.nn.L1Loss()

        self.loss_used = 0

        sources = ['drum',
                   'noise',
                   ]
        
        self.demucs_mixer =  torchaudio.models.HDemucs(
            sources=sources,
            audio_channels=7,
            depth=6,
        )

        self.out_conv = nn.Conv1d(in_channels=7, out_channels=2, kernel_size=1)
        self.out = nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1)      


    def compute_loss(self, outputs, ref_signals):
        loss = self.loss_fn(outputs, ref_signals) + self.loss_fn_2(outputs, ref_signals) +  self.loss_fn_3(outputs, ref_signals)
        return loss

    def forward(self, audio, drumroll):
        to_mix = torch.cat([audio, drumroll], axis=1)
        out = self.demucs_mixer(to_mix)
        out_2 = self.out_conv(out[:, 0, :, :])
        out_2 = self.out(out_2)
        # out_2 = torch.tanh(out_2)

        return out_2
    
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        audio, drum, drumroll = batch
        
        outputs = self.forward(audio, drumroll)
        # print(outputs.size())

        if batch_idx % 64 == 0:
            input_signal = audio[0].cpu().detach().numpy().T
            generated_signal = outputs[0].cpu().detach().numpy().T
            drum_signal = drum[0].cpu().detach().numpy().T 
            wandb.log({'audio_input': [wandb.Audio(input_signal, caption="Input", sample_rate=44100)]})
            wandb.log({'audio_reference': [wandb.Audio(drum_signal, caption="Reference", sample_rate=44100)]})
            wandb.log({'audio_output': [wandb.Audio(generated_signal, caption="Output", sample_rate=44100)]})
             
            for i in range(5):
                wandb.log({f'drum_{i + 1}': [wandb.Audio(drumroll[0].cpu().detach().numpy()[i, :], caption="Output", sample_rate=44100)]})


        loss = self.compute_loss(outputs, drum)         

        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    

    def configure_optimizers(self):
        # Define your optimizer and optionally learning rate scheduler here
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
        return [optimizer], [scheduler]
        

In [10]:
def load_audio(path, start_point, filename):
    audio_tensors = []
    waveform, _ = torchaudio.load(f"{path}/{filename}")
    return waveform

def load_roll(path, start_point, frames):
    midi = path + '/mixture.wav.mid'
    transcription = pretty_midi.PrettyMIDI(midi)
    roll = turn_transcription_into_roll(transcription, frames)

    return torch.from_numpy(roll).float()


# SISNR

In [11]:
def separate_sources(
    model,
    mix,
    drumroll,
    segment=4.0,
    overlap=0,
    device=None,
):
    """
    Apply model to a given mixture. Use fade, and add segments together in order to add model segment by segment.

    Args:
        segment (int): segment length in seconds
        device (torch.device, str, or None): if provided, device on which to
            execute the computation, otherwise `mix.device` is assumed.
            When `device` is different from `mix.device`, only local computations will
            be on `device`, while the entire tracks will be stored on `mix.device`.
    """
    if device is None:
        device = mix.device
    else:
        device = torch.device(device)

    batch, channels, length = mix.shape

    chunk_len = int(sample_rate * segment * (1 + overlap))
    start = 0
    end = chunk_len
    overlap_frames = overlap * sample_rate
    fade = Fade(fade_in_len=0, fade_out_len=int(overlap_frames), fade_shape="linear")

    final = torch.zeros(batch, channels, length, device=device)

    while start < length - overlap_frames:
        chunk = mix[:, :, start:end]
        roll = drumroll[:, :, start:end]
        with torch.no_grad():
            out = model.forward(chunk, roll)
        out = fade(out)
        final[:, :, start:end] += out
        if start == 0:
            fade.fade_in_len = int(overlap_frames)
            start += int(chunk_len - overlap_frames)
        else:
            start += chunk_len
        end += chunk_len
        if end >= length:
            fade.fade_out_len = 0
    return final

In [12]:
class AudioData:
    def __init__(self, audio):
        self.audio = audio

In [None]:
for name in ['epoch_285']: #, 'epoch_230','epoch_200','epoch_100']:
    try:
        try:
            os.mkdir(f"D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/results_{name}/")
        except:
            pass
    
        model = DrumDemucs.load_from_checkpoint(f'D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/checkpoint/{name}.ckpt')
        # model.to('cpu')
        model = model.eval()
        
        mus = musdb.DB(root="D:/Github/phd-drum-sep/Data/musdb18_stems/", subsets="test") 
        
        results = museval.EvalStore(frames_agg='median', tracks_agg='median')
        for track in tqdm(mus):
            path = "D:/Github/phd-drum-sep/Data/musdb18hq/"
            audio_path = path + 'test/' + track.name + "/"
        
        
            mixture_tensor = load_audio(audio_path, start_point,'mixture.wav').unsqueeze(0).to(model.device)
            drum_tensor = load_audio(audio_path, start_point,'drums.wav').unsqueeze(0).to(model.device)
            shape = mixture_tensor.shape[2]
            roll_tensor = load_roll(audio_path, start_point, shape).unsqueeze(0).to(model.device)
        
            seperated = separate_sources(model, mixture_tensor, roll_tensor, device='cuda')
        
            output_path = f"D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/results_{name}/"
            
            audio = seperated.squeeze(0).cpu().numpy()
            audio = np.swapaxes(audio,0,1)
            estimates = {'drums': audio, 'bass': audio}
        
            d = drum_tensor.squeeze(0).cpu().numpy()
            d = np.swapaxes(d,0,1)
            
            track.targets['drums'] = AudioData(d)
            track.targets['bass'] = AudioData(d)
            
            scores = museval.eval_mus_track(
                track, estimates, output_dir=f"{output_path}"
            )
        
            print(scores)
            results.add_track(scores)
            # break
        
        results.df.to_csv(f"{output_path}results.csv")
    
        del model
        del mus
        gc.collect()
    except Exception as e:
        print('error with model, skipping', name, e)

  2%|████                                                                                                                                                                                                    | 1/50 [00:35<29:15, 35.82s/it]

drums           ==> SDR:   3.379  SIR: 229.056  ISR:   6.238  SAR:   3.270  
bass            ==> SDR:   3.379  SIR: 229.056  ISR:   6.238  SAR:   3.270  



  4%|████████                                                                                                                                                                                                | 2/50 [01:09<27:31, 34.41s/it]

drums           ==> SDR:   2.915  SIR: 183.222  ISR:   5.823  SAR:   0.504  
bass            ==> SDR:   2.915  SIR: 183.222  ISR:   5.823  SAR:   0.504  



  6%|████████████                                                                                                                                                                                            | 3/50 [01:29<21:57, 28.04s/it]

drums           ==> SDR:   4.656  SIR: 220.191  ISR:   7.617  SAR:   4.169  
bass            ==> SDR:   4.656  SIR: 220.191  ISR:   7.617  SAR:   4.169  



  8%|████████████████                                                                                                                                                                                        | 4/50 [02:04<23:35, 30.77s/it]

drums           ==> SDR:   2.966  SIR: 238.800  ISR:   8.133  SAR:   0.839  
bass            ==> SDR:   2.966  SIR: 238.800  ISR:   8.133  SAR:   0.839  



 10%|████████████████████                                                                                                                                                                                    | 5/50 [02:44<25:35, 34.13s/it]

drums           ==> SDR:   8.046  SIR: 188.389  ISR:  12.083  SAR:   8.083  
bass            ==> SDR:   8.046  SIR: 188.389  ISR:  12.083  SAR:   8.083  



 12%|████████████████████████                                                                                                                                                                                | 6/50 [03:19<25:14, 34.43s/it]

drums           ==> SDR:   4.621  SIR: 233.643  ISR:   7.825  SAR:   4.071  
bass            ==> SDR:   4.621  SIR: 233.643  ISR:   7.825  SAR:   4.071  



 14%|████████████████████████████                                                                                                                                                                            | 7/50 [03:54<24:45, 34.55s/it]

drums           ==> SDR:   5.158  SIR: 193.834  ISR:   9.243  SAR:   4.192  
bass            ==> SDR:   5.158  SIR: 193.834  ISR:   9.243  SAR:   4.192  



 16%|████████████████████████████████                                                                                                                                                                        | 8/50 [04:30<24:25, 34.89s/it]

drums           ==> SDR:   4.026  SIR: 244.064  ISR:   7.317  SAR:   2.737  
bass            ==> SDR:   4.026  SIR: 244.064  ISR:   7.317  SAR:   2.737  



 18%|████████████████████████████████████                                                                                                                                                                    | 9/50 [05:07<24:24, 35.71s/it]

drums           ==> SDR:   6.423  SIR: 179.150  ISR:   9.294  SAR:   6.922  
bass            ==> SDR:   6.423  SIR: 179.150  ISR:   9.294  SAR:   6.922  



 20%|███████████████████████████████████████▊                                                                                                                                                               | 10/50 [05:44<24:00, 36.01s/it]

drums           ==> SDR:   2.570  SIR: 230.765  ISR:   5.308  SAR:   1.157  
bass            ==> SDR:   2.570  SIR: 230.765  ISR:   5.308  SAR:   1.157  



 22%|███████████████████████████████████████████▊                                                                                                                                                           | 11/50 [06:20<23:30, 36.15s/it]

drums           ==> SDR:   7.731  SIR: 219.260  ISR:  11.503  SAR:   7.953  
bass            ==> SDR:   7.731  SIR: 219.260  ISR:  11.503  SAR:   7.953  



 24%|███████████████████████████████████████████████▊                                                                                                                                                       | 12/50 [06:54<22:23, 35.36s/it]

drums           ==> SDR:   4.077  SIR: 217.199  ISR:   4.717  SAR:   3.616  
bass            ==> SDR:   4.077  SIR: 217.199  ISR:   4.717  SAR:   3.616  



 26%|███████████████████████████████████████████████████▋                                                                                                                                                   | 13/50 [07:14<18:56, 30.70s/it]

drums           ==> SDR:   2.959  SIR: 163.850  ISR:   4.417  SAR:   1.539  
bass            ==> SDR:   2.959  SIR: 163.850  ISR:   4.417  SAR:   1.539  



 28%|███████████████████████████████████████████████████████▋                                                                                                                                               | 14/50 [07:51<19:36, 32.68s/it]

drums           ==> SDR:   7.412  SIR: 211.044  ISR:  11.488  SAR:   7.789  
bass            ==> SDR:   7.412  SIR: 211.044  ISR:  11.488  SAR:   7.789  



 30%|███████████████████████████████████████████████████████████▋                                                                                                                                           | 15/50 [09:03<25:54, 44.43s/it]

drums           ==> SDR:   4.856  SIR: 244.819  ISR:   6.790  SAR:   4.769  
bass            ==> SDR:   4.856  SIR: 244.819  ISR:   6.790  SAR:   4.769  



 32%|███████████████████████████████████████████████████████████████▋                                                                                                                                       | 16/50 [09:41<24:08, 42.61s/it]

drums           ==> SDR:   4.335  SIR: 219.054  ISR:   7.181  SAR:   2.901  
bass            ==> SDR:   4.335  SIR: 219.054  ISR:   7.181  SAR:   2.901  



 34%|███████████████████████████████████████████████████████████████████▋                                                                                                                                   | 17/50 [10:00<19:27, 35.39s/it]

drums           ==> SDR:   1.941  SIR: 219.946  ISR:   4.045  SAR:  -1.321  
bass            ==> SDR:   1.941  SIR: 219.946  ISR:   4.045  SAR:  -1.321  



 36%|███████████████████████████████████████████████████████████████████████▋                                                                                                                               | 18/50 [10:33<18:36, 34.89s/it]

drums           ==> SDR:   4.957  SIR: 228.442  ISR:   8.356  SAR:   5.964  
bass            ==> SDR:   4.957  SIR: 228.442  ISR:   8.356  SAR:   5.964  



 38%|███████████████████████████████████████████████████████████████████████████▌                                                                                                                           | 19/50 [11:10<18:12, 35.26s/it]

drums           ==> SDR:   9.987  SIR: 220.457  ISR:  15.876  SAR:  10.124  
bass            ==> SDR:   9.987  SIR: 220.457  ISR:  15.876  SAR:  10.124  



 40%|███████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 20/50 [11:46<17:52, 35.74s/it]

drums           ==> SDR:   3.581  SIR: 223.935  ISR:   4.281  SAR:   7.191  
bass            ==> SDR:   3.581  SIR: 223.935  ISR:   4.281  SAR:   7.191  



 42%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                                                   | 21/50 [12:25<17:44, 36.72s/it]

drums           ==> SDR:   5.454  SIR: 157.476  ISR:   7.494  SAR:   6.021  
bass            ==> SDR:   5.454  SIR: 157.476  ISR:   7.494  SAR:   6.021  



 44%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                                                               | 22/50 [13:00<16:50, 36.09s/it]

drums           ==> SDR:   3.603  SIR: 177.935  ISR:   8.179  SAR:   2.634  
bass            ==> SDR:   3.603  SIR: 177.935  ISR:   8.179  SAR:   2.634  



 46%|███████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                           | 23/50 [13:39<16:34, 36.85s/it]

drums           ==> SDR:   3.498  SIR: 156.884  ISR:   5.790  SAR:   2.410  
bass            ==> SDR:   3.498  SIR: 156.884  ISR:   5.790  SAR:   2.410  



 48%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                       | 24/50 [14:15<15:50, 36.54s/it]

drums           ==> SDR:   6.413  SIR: 139.460  ISR:   9.600  SAR:   6.862  
bass            ==> SDR:   6.413  SIR: 139.460  ISR:   9.600  SAR:   6.862  



 50%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                   | 25/50 [14:51<15:14, 36.58s/it]

drums           ==> SDR:   3.405  SIR: 231.479  ISR:   5.535  SAR:   2.020  
bass            ==> SDR:   3.405  SIR: 231.479  ISR:   5.535  SAR:   2.020  



 52%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                               | 26/50 [15:30<14:57, 37.38s/it]

drums           ==> SDR:   7.984  SIR: 154.991  ISR:  11.960  SAR:   8.895  
bass            ==> SDR:   7.984  SIR: 154.991  ISR:  11.960  SAR:   8.895  



 54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                           | 27/50 [16:12<14:47, 38.59s/it]

drums           ==> SDR:   6.821  SIR: 171.312  ISR:  11.042  SAR:   6.979  
bass            ==> SDR:   6.821  SIR: 171.312  ISR:  11.042  SAR:   6.979  



 56%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                       | 28/50 [16:33<12:13, 33.34s/it]

drums           ==> SDR:   9.880  SIR: 194.003  ISR:  13.182  SAR:  10.820  
bass            ==> SDR:   9.880  SIR: 194.003  ISR:  13.182  SAR:  10.820  



 58%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                   | 29/50 [16:43<09:13, 26.34s/it]

drums           ==> SDR:   5.186  SIR: 215.671  ISR:   6.199  SAR:   7.212  
bass            ==> SDR:   5.186  SIR: 215.671  ISR:   6.199  SAR:   7.212  



 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                               | 30/50 [17:16<09:28, 28.41s/it]

drums           ==> SDR:   5.597  SIR: 200.880  ISR:   8.530  SAR:   4.801  
bass            ==> SDR:   5.597  SIR: 200.880  ISR:   8.530  SAR:   4.801  



 62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                           | 31/50 [17:51<09:33, 30.18s/it]

drums           ==> SDR:   3.210  SIR: 205.626  ISR:   7.618  SAR:   1.899  
bass            ==> SDR:   3.210  SIR: 205.626  ISR:   7.618  SAR:   1.899  



 64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 32/50 [18:27<09:36, 32.04s/it]

drums           ==> SDR:   1.808  SIR: 239.427  ISR:   6.205  SAR:   0.321  
bass            ==> SDR:   1.808  SIR: 239.427  ISR:   6.205  SAR:   0.321  



 66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 33/50 [19:04<09:28, 33.42s/it]

drums           ==> SDR:   2.756  SIR: 241.984  ISR:   5.394  SAR:   0.816  
bass            ==> SDR:   2.756  SIR: 241.984  ISR:   5.394  SAR:   0.816  



 68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                               | 34/50 [19:39<09:04, 34.03s/it]

drums           ==> SDR:   4.298  SIR: 238.895  ISR:   7.099  SAR:   3.367  
bass            ==> SDR:   4.298  SIR: 238.895  ISR:   7.099  SAR:   3.367  



 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 35/50 [20:17<08:46, 35.11s/it]

drums           ==> SDR:   8.900  SIR: 218.219  ISR:  12.491  SAR:  10.070  
bass            ==> SDR:   8.900  SIR: 218.219  ISR:  12.491  SAR:  10.070  



 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 36/50 [20:38<07:13, 30.98s/it]

drums           ==> SDR:   2.239  SIR: 206.413  ISR:   5.591  SAR:   0.063  
bass            ==> SDR:   2.239  SIR: 206.413  ISR:   5.591  SAR:   0.063  



 74%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                   | 37/50 [21:51<09:25, 43.50s/it]

drums           ==> SDR:   5.923  SIR: 231.463  ISR:  12.030  SAR:   4.936  
bass            ==> SDR:   5.923  SIR: 231.463  ISR:  12.030  SAR:   4.936  



 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 38/50 [22:29<08:22, 41.86s/it]

drums           ==> SDR:   3.908  SIR: 219.750  ISR:   7.396  SAR:   4.281  
bass            ==> SDR:   3.908  SIR: 219.750  ISR:   7.396  SAR:   4.281  



 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 39/50 [23:07<07:29, 40.82s/it]

drums           ==> SDR:   2.634  SIR: 231.783  ISR:   4.036  SAR:   1.937  
bass            ==> SDR:   2.634  SIR: 231.783  ISR:   4.036  SAR:   1.937  



 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 40/50 [23:47<06:45, 40.57s/it]

drums           ==> SDR:   5.349  SIR: 197.696  ISR:   9.244  SAR:   4.982  
bass            ==> SDR:   5.349  SIR: 197.696  ISR:   9.244  SAR:   4.982  



 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 41/50 [24:23<05:51, 39.09s/it]

drums           ==> SDR:   4.929  SIR: 172.375  ISR:   8.846  SAR:   4.921  
bass            ==> SDR:   4.929  SIR: 172.375  ISR:   8.846  SAR:   4.921  



 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 42/50 [24:58<05:03, 37.97s/it]

drums           ==> SDR:   7.339  SIR: 133.070  ISR:  11.432  SAR:   6.577  
bass            ==> SDR:   7.339  SIR: 133.070  ISR:  11.432  SAR:   6.577  



 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                           | 43/50 [25:36<04:26, 38.00s/it]

drums           ==> SDR:   3.600  SIR: 168.902  ISR:   7.312  SAR:   1.959  
bass            ==> SDR:   3.600  SIR: 168.902  ISR:   7.312  SAR:   1.959  



 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 44/50 [26:11<03:41, 36.99s/it]

drums           ==> SDR:   2.194  SIR: 195.566  ISR:   3.294  SAR:   0.403  
bass            ==> SDR:   2.194  SIR: 195.566  ISR:   3.294  SAR:   0.403  



 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                    | 45/50 [26:51<03:09, 37.95s/it]

drums           ==> SDR:   0.759  SIR: 204.744  ISR:   1.709  SAR:  -1.261  
bass            ==> SDR:   0.759  SIR: 204.744  ISR:   1.709  SAR:  -1.261  



 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 46/50 [27:27<02:29, 37.26s/it]

drums           ==> SDR:   2.348  SIR: 215.418  ISR:   4.019  SAR:   0.296  
bass            ==> SDR:   2.348  SIR: 215.418  ISR:   4.019  SAR:   0.296  



 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 47/50 [27:46<01:35, 31.93s/it]

drums           ==> SDR:   7.991  SIR: 151.938  ISR:  12.404  SAR:   8.165  
bass            ==> SDR:   7.991  SIR: 151.938  ISR:  12.404  SAR:   8.165  



In [None]:
results.agg_frames_tracks_scores()