# Imports

In [1]:
import os
import pywt
import torch
import torch.nn as nn
import torch.nn.functional as F
import torchaudio
import pytorch_lightning as pl
from torch.utils.data import DataLoader, Dataset
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
import torch.optim as optim
from torch.utils.data import Dataset, DataLoader
import torchaudio
import numpy as np
import random
from pytorch_lightning.loggers import WandbLogger
import wandb
import auraloss
import collections
from tqdm import tqdm
import pretty_midi
import matplotlib.pyplot as plt
from torchaudio.pipelines import HDEMUCS_HIGH_MUSDB
print(torch.cuda.is_available())
import plotly.graph_objects as go
from torch.optim import lr_scheduler
from IPython.display import Audio
from torchaudio.transforms import Fade
import musdb
import museval
import gc

True


# Set Seeds

In [2]:
seed_value = 3407
torch.manual_seed(seed_value)
random.seed(seed_value)
np.random.seed(seed_value)
torch.cuda.manual_seed(seed_value)
torch.cuda.manual_seed_all(seed_value)
torch.set_float32_matmul_precision('high')

# Construct Teh Datas

In [3]:
path = "D:/Github/phd-drum-sep/Data/musdb18hq/"

In [4]:
os.listdir(path)

['test', 'train']

In [5]:
train = list(os.listdir(path+'train'))
test = list(os.listdir(path+'test'))

In [6]:
sources = ['drum', 'bass', 'other', 'vocals']

In [7]:
all_scenes = {}
counter = 0
sample_rate = 44100
segment_length = sample_rate * 1

for idx, val in tqdm(enumerate(test)):
    p = path + 'test/' + val + "/"
    info = torchaudio.info(f"{p}mixture.wav")
    seconds = info.num_frames // 44100
    for i in range(0, seconds - 1, 1):
        start_point = i * 44100
        if start_point + 44100 < info.num_frames:
            all_scenes[counter] = {'music_path': p, 'start_point': start_point, 'length': 44100, 'frames' : info.num_frames}
            counter += 1

50it [00:00, 4545.59it/s]


In [8]:
def turn_transcription_into_roll(transcription, frames):
    # Determine your sampling frequency (frames per second)
    fs = 44100
    
    piano_roll_length = int(frames)
    
    # Initialize the piano roll array
    piano_roll = np.zeros((64, piano_roll_length))
    
    # Fill in the piano roll array
    for note in transcription.instruments[0].notes:
        # Convert start and end times to frame indices
        start_frame = int(np.floor(note.start * fs))
        end_frame = int(np.ceil(note.end * fs))
        
        # Set the corresponding frames to 1 (or note.velocity for a velocity-sensitive representation)
        piano_roll[note.pitch, start_frame:end_frame] = 1  # Or use note.velocity
        
    roll = np.vstack([piano_roll[35:36, :], piano_roll[38:39, :], piano_roll[42:43, :], piano_roll[47:48, :], piano_roll[49:50, :]])
    return roll

# model

In [9]:
class DrumDemucs(pl.LightningModule):
    def __init__(self):
        super(DrumDemucs, self).__init__()

        self.loss_fn = auraloss.freq.MultiResolutionSTFTLoss(
                    fft_sizes=[1024, 2048, 4096],
                    hop_sizes=[256, 512, 1024],
                    win_lengths=[1024, 2048, 4096],
                    scale="mel", 
                    n_bins=150,
                    sample_rate=44100,
                    device="cuda"
                )

        self.loss_fn_2 = auraloss.time.SISDRLoss()

        self.loss_fn_3 = torch.nn.L1Loss()

        self.loss_used = 0

        sources = ['drum',
                   'noise',
                   ]
        
        self.demucs_mixer =  torchaudio.models.HDemucs(
            sources=sources,
            audio_channels=7,
            depth=6,
        )

        self.out_conv = nn.Conv1d(in_channels=7, out_channels=2, kernel_size=1)
        self.out = nn.Conv1d(in_channels=2, out_channels=2, kernel_size=1)      


    def compute_loss(self, outputs, ref_signals):
        loss = self.loss_fn(outputs, ref_signals) + self.loss_fn_2(outputs, ref_signals) +  self.loss_fn_3(outputs, ref_signals)
        return loss

    def forward(self, audio, drumroll):
        to_mix = torch.cat([audio, drumroll], axis=1)
        out = self.demucs_mixer(to_mix)
        out_2 = self.out_conv(out[:, 0, :, :])
        out_2 = self.out(out_2)
        # out_2 = torch.tanh(out_2)

        return out_2
    
    def training_step(self, batch, batch_idx):
        # training_step defines the train loop. It is independent of forward
        audio, drum, drumroll = batch
        
        outputs = self.forward(audio, drumroll)
        # print(outputs.size())

        if batch_idx % 64 == 0:
            input_signal = audio[0].cpu().detach().numpy().T
            generated_signal = outputs[0].cpu().detach().numpy().T
            drum_signal = drum[0].cpu().detach().numpy().T 
            wandb.log({'audio_input': [wandb.Audio(input_signal, caption="Input", sample_rate=44100)]})
            wandb.log({'audio_reference': [wandb.Audio(drum_signal, caption="Reference", sample_rate=44100)]})
            wandb.log({'audio_output': [wandb.Audio(generated_signal, caption="Output", sample_rate=44100)]})
             
            for i in range(5):
                wandb.log({f'drum_{i + 1}': [wandb.Audio(drumroll[0].cpu().detach().numpy()[i, :], caption="Output", sample_rate=44100)]})


        loss = self.compute_loss(outputs, drum)         

        self.log('train_loss', loss, on_step=True, on_epoch=True, prog_bar=True, logger=True)
        
        return loss
    

    def configure_optimizers(self):
        # Define your optimizer and optionally learning rate scheduler here
        optimizer = optim.Adam(self.parameters(), lr=0.001)
        scheduler = lr_scheduler.StepLR(optimizer, step_size=1, gamma=0.99)
        return [optimizer], [scheduler]
        

In [10]:
def load_audio(path, start_point, filename):
    audio_tensors = []
    waveform, _ = torchaudio.load(f"{path}/{filename}")
    return waveform

def load_roll(path, start_point, frames):
    midi = path + '/mixture.wav.mid'
    transcription = pretty_midi.PrettyMIDI(midi)
    roll = turn_transcription_into_roll(transcription, frames)

    return torch.from_numpy(roll).float()


# SISNR

In [11]:
def separate_sources(
    model,
    mix,
    drumroll,
    segment=4.0,
    overlap=0,
    device=None,
):
    """
    Apply model to a given mixture. Use fade, and add segments together in order to add model segment by segment.

    Args:
        segment (int): segment length in seconds
        device (torch.device, str, or None): if provided, device on which to
            execute the computation, otherwise `mix.device` is assumed.
            When `device` is different from `mix.device`, only local computations will
            be on `device`, while the entire tracks will be stored on `mix.device`.
    """
    if device is None:
        device = mix.device
    else:
        device = torch.device(device)

    batch, channels, length = mix.shape

    chunk_len = int(sample_rate * segment * (1 + overlap))
    start = 0
    end = chunk_len
    overlap_frames = overlap * sample_rate
    fade = Fade(fade_in_len=0, fade_out_len=int(overlap_frames), fade_shape="linear")

    final = torch.zeros(batch, channels, length, device=device)

    while start < length - overlap_frames:
        chunk = mix[:, :, start:end]
        roll = drumroll[:, :, start:end]
        roll = torch.zeros_like(roll).to(model.device)
        with torch.no_grad():
            out = model.forward(chunk, roll)
        out = fade(out)
        final[:, :, start:end] += out
        if start == 0:
            fade.fade_in_len = int(overlap_frames)
            start += int(chunk_len - overlap_frames)
        else:
            start += chunk_len
        end += chunk_len
        if end >= length:
            fade.fade_out_len = 0
    return final

In [12]:
class AudioData:
    def __init__(self, audio):
        self.audio = audio

In [13]:
for name in ['epoch_280']:
    try:
        try:
            os.mkdir(f"D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/results_{name}_zeros/")
        except:
            pass
    
        model = DrumDemucs.load_from_checkpoint(f'D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/checkpoint/{name}.ckpt')
        # model.to('cpu')
        model = model.eval()
        
        mus = musdb.DB(root="D:/Github/phd-drum-sep/Data/musdb18_stems/", subsets="test") 
        
        results = museval.EvalStore(frames_agg='median', tracks_agg='median')
        for track in tqdm(mus):
            try:
                path = "D:/Github/phd-drum-sep/Data/musdb18hq/"
                audio_path = path + 'test/' + track.name + "/"
            
            
                mixture_tensor = load_audio(audio_path, start_point,'mixture.wav').unsqueeze(0).to(model.device)
                drum_tensor = load_audio(audio_path, start_point,'drums.wav').unsqueeze(0).to(model.device)
                shape = mixture_tensor.shape[2]
                roll_tensor = load_roll(audio_path, start_point, shape).unsqueeze(0).to(model.device)
            
                seperated = separate_sources(model, mixture_tensor, roll_tensor, device='cuda')
            
                output_path = f"D:/Github/phd-drum-sep/analysis/demucs_small_model_analysis/results_{name}_zeros/"
                
                audio = seperated.squeeze(0).cpu().numpy()
                audio = np.swapaxes(audio,0,1)
                estimates = {'drums': audio, 'bass': audio}
            
                d = drum_tensor.squeeze(0).cpu().numpy()
                d = np.swapaxes(d,0,1)
                
                track.targets['drums'] = AudioData(d)
                track.targets['bass'] = AudioData(d)
                
                scores = museval.eval_mus_track(
                    track, estimates, output_dir=f"{output_path}"
                )
            
                print(scores)
                results.add_track(scores)
                gc.collect()
                # break
            except:
                pass
        
        results.df.to_csv(f"{output_path}results.csv")
    
        del model
        del mus
        
    except Exception as e:
        print('error with model, skipping', name, e)

  2%|████                                                                                                                                                                                                    | 1/50 [00:35<28:45, 35.21s/it]

drums           ==> SDR:   0.548  SIR: 227.379  ISR:   1.066  SAR:  -0.162  
bass            ==> SDR:   0.548  SIR: 227.379  ISR:   1.066  SAR:  -0.162  



  4%|████████                                                                                                                                                                                                | 2/50 [01:08<27:02, 33.80s/it]

drums           ==> SDR:   1.169  SIR: 170.780  ISR:   2.023  SAR:  -2.313  
bass            ==> SDR:   1.169  SIR: 170.780  ISR:   2.023  SAR:  -2.313  



  6%|████████████                                                                                                                                                                                            | 3/50 [01:28<21:33, 27.53s/it]

drums           ==> SDR:   0.689  SIR: 227.348  ISR:   1.443  SAR:  -3.152  
bass            ==> SDR:   0.689  SIR: 227.348  ISR:   1.443  SAR:  -3.152  



  8%|████████████████                                                                                                                                                                                        | 4/50 [02:02<23:02, 30.06s/it]

drums           ==> SDR:   1.112  SIR: 233.295  ISR:   1.990  SAR:  -0.754  
bass            ==> SDR:   1.112  SIR: 233.295  ISR:   1.990  SAR:  -0.754  



 10%|████████████████████                                                                                                                                                                                    | 5/50 [02:40<24:50, 33.12s/it]

drums           ==> SDR:   0.393  SIR: 192.635  ISR:   1.355  SAR:  -1.405  
bass            ==> SDR:   0.393  SIR: 192.635  ISR:   1.355  SAR:  -1.405  



 12%|████████████████████████                                                                                                                                                                                | 6/50 [03:14<24:22, 33.25s/it]

drums           ==> SDR:   1.255  SIR: 231.723  ISR:   2.004  SAR:  -0.992  
bass            ==> SDR:   1.255  SIR: 231.723  ISR:   2.004  SAR:  -0.992  



 14%|████████████████████████████                                                                                                                                                                            | 7/50 [03:46<23:37, 32.96s/it]

drums           ==> SDR:   0.458  SIR: 190.793  ISR:   1.139  SAR:  -3.814  
bass            ==> SDR:   0.458  SIR: 190.793  ISR:   1.139  SAR:  -3.814  



 16%|████████████████████████████████                                                                                                                                                                        | 8/50 [04:20<23:15, 33.23s/it]

drums           ==> SDR:   0.893  SIR: 243.954  ISR:   2.001  SAR:  -2.802  
bass            ==> SDR:   0.893  SIR: 243.954  ISR:   2.001  SAR:  -2.802  



 18%|████████████████████████████████████                                                                                                                                                                    | 9/50 [04:56<23:20, 34.15s/it]

drums           ==> SDR:   0.082  SIR: 154.439  ISR:   0.484  SAR:  -3.910  
bass            ==> SDR:   0.082  SIR: 154.439  ISR:   0.484  SAR:  -3.910  



 20%|███████████████████████████████████████▊                                                                                                                                                               | 10/50 [05:31<22:55, 34.40s/it]

drums           ==> SDR:   0.169  SIR: 216.552  ISR:   0.799  SAR:  -7.817  
bass            ==> SDR:   0.169  SIR: 216.552  ISR:   0.799  SAR:  -7.817  



 22%|███████████████████████████████████████████▊                                                                                                                                                           | 11/50 [06:06<22:31, 34.64s/it]

drums           ==> SDR:   0.108  SIR: 211.688  ISR:   0.423  SAR:  -1.325  
bass            ==> SDR:   0.108  SIR: 211.688  ISR:   0.423  SAR:  -1.325  



 24%|███████████████████████████████████████████████▊                                                                                                                                                       | 12/50 [06:38<21:25, 33.82s/it]

drums           ==> SDR:   1.220  SIR: 226.378  ISR:   1.582  SAR:  -1.841  
bass            ==> SDR:   1.220  SIR: 226.378  ISR:   1.582  SAR:  -1.841  



 26%|███████████████████████████████████████████████████▋                                                                                                                                                   | 13/50 [06:57<18:08, 29.42s/it]

drums           ==> SDR:   0.205  SIR: 145.148  ISR:   0.358  SAR:  -0.392  
bass            ==> SDR:   0.205  SIR: 145.148  ISR:   0.358  SAR:  -0.392  



 28%|███████████████████████████████████████████████████████▋                                                                                                                                               | 14/50 [07:33<18:46, 31.29s/it]

drums           ==> SDR:   0.075  SIR: 189.777  ISR:   0.372  SAR:  -4.081  
bass            ==> SDR:   0.075  SIR: 189.777  ISR:   0.372  SAR:  -4.081  



 30%|███████████████████████████████████████████████████████████▋                                                                                                                                           | 15/50 [08:42<24:53, 42.67s/it]

drums           ==> SDR:   1.579  SIR: 237.972  ISR:   2.243  SAR:  -0.987  
bass            ==> SDR:   1.579  SIR: 237.972  ISR:   2.243  SAR:  -0.987  



 32%|███████████████████████████████████████████████████████████████▋                                                                                                                                       | 16/50 [09:20<23:20, 41.20s/it]

drums           ==> SDR:   0.068  SIR: 202.669  ISR:   0.157  SAR:  -4.647  
bass            ==> SDR:   0.068  SIR: 202.669  ISR:   0.157  SAR:  -4.647  



 34%|███████████████████████████████████████████████████████████████████▋                                                                                                                                   | 17/50 [09:39<18:58, 34.50s/it]

drums           ==> SDR:   0.548  SIR: 221.575  ISR:   1.214  SAR:  -6.536  
bass            ==> SDR:   0.548  SIR: 221.575  ISR:   1.214  SAR:  -6.536  



 36%|███████████████████████████████████████████████████████████████████████▋                                                                                                                               | 18/50 [10:14<18:27, 34.61s/it]

drums           ==> SDR:   1.465  SIR: 223.070  ISR:   2.361  SAR:  -1.784  
bass            ==> SDR:   1.465  SIR: 223.070  ISR:   2.361  SAR:  -1.784  



 38%|███████████████████████████████████████████████████████████████████████████▌                                                                                                                           | 19/50 [10:51<18:21, 35.52s/it]

drums           ==> SDR:   0.400  SIR: 210.808  ISR:   1.149  SAR:  -0.912  
bass            ==> SDR:   0.400  SIR: 210.808  ISR:   1.149  SAR:  -0.912  



 40%|███████████████████████████████████████████████████████████████████████████████▌                                                                                                                       | 20/50 [11:30<18:18, 36.62s/it]

drums           ==> SDR:   1.027  SIR: 222.672  ISR:   1.409  SAR:  -0.643  
bass            ==> SDR:   1.027  SIR: 222.672  ISR:   1.409  SAR:  -0.643  



 42%|███████████████████████████████████████████████████████████████████████████████████▌                                                                                                                   | 21/50 [12:09<17:57, 37.15s/it]

drums           ==> SDR:   2.240  SIR: 148.696  ISR:   3.040  SAR:   0.374  
bass            ==> SDR:   2.240  SIR: 148.696  ISR:   3.040  SAR:   0.374  



 44%|███████████████████████████████████████████████████████████████████████████████████████▌                                                                                                               | 22/50 [12:44<17:02, 36.53s/it]

drums           ==> SDR:   0.003  SIR: 151.596  ISR:   0.411  SAR:  -1.545  
bass            ==> SDR:   0.003  SIR: 151.596  ISR:   0.411  SAR:  -1.545  



 46%|███████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                           | 23/50 [13:21<16:28, 36.60s/it]

drums           ==> SDR:   0.164  SIR: 139.948  ISR:   0.356  SAR:  -2.009  
bass            ==> SDR:   0.164  SIR: 139.948  ISR:   0.356  SAR:  -2.009  



 48%|███████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                       | 24/50 [13:55<15:30, 35.80s/it]

drums           ==> SDR:   0.542  SIR: 112.920  ISR:   0.715  SAR:  -0.131  
bass            ==> SDR:   0.542  SIR: 112.920  ISR:   0.715  SAR:  -0.131  



 50%|███████████████████████████████████████████████████████████████████████████████████████████████████▌                                                                                                   | 25/50 [14:31<14:56, 35.86s/it]

drums           ==> SDR:   0.238  SIR: 224.730  ISR:   0.605  SAR:  -5.237  
bass            ==> SDR:   0.238  SIR: 224.730  ISR:   0.605  SAR:  -5.237  



 52%|███████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                               | 26/50 [15:06<14:17, 35.71s/it]

drums           ==> SDR:   0.302  SIR: 129.082  ISR:   0.621  SAR:  -0.353  
bass            ==> SDR:   0.302  SIR: 129.082  ISR:   0.621  SAR:  -0.353  



 54%|███████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                           | 27/50 [15:44<13:54, 36.30s/it]

drums           ==> SDR:   1.861  SIR: 169.935  ISR:   4.177  SAR:   0.166  
bass            ==> SDR:   1.861  SIR: 169.935  ISR:   4.177  SAR:   0.166  



 56%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                       | 28/50 [16:03<11:25, 31.15s/it]

drums           ==> SDR:   0.633  SIR: 192.178  ISR:   0.656  SAR:  -2.672  
bass            ==> SDR:   0.633  SIR: 192.178  ISR:   0.656  SAR:  -2.672  



 58%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                                   | 29/50 [16:12<08:39, 24.72s/it]

drums           ==> SDR:   0.220  SIR: 200.631  ISR:   0.304  SAR:  -1.262  
bass            ==> SDR:   0.220  SIR: 200.631  ISR:   0.304  SAR:  -1.262  



 60%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                               | 30/50 [16:45<09:03, 27.19s/it]

drums           ==> SDR:   0.256  SIR: 196.038  ISR:   0.809  SAR:  -1.626  
bass            ==> SDR:   0.256  SIR: 196.038  ISR:   0.809  SAR:  -1.626  



 62%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▍                                                                           | 31/50 [17:18<09:09, 28.94s/it]

drums           ==> SDR:   1.083  SIR: 198.946  ISR:   2.642  SAR:  -2.964  
bass            ==> SDR:   1.083  SIR: 198.946  ISR:   2.642  SAR:  -2.964  



 64%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                       | 32/50 [17:53<09:14, 30.78s/it]

drums           ==> SDR:   0.037  SIR: 228.971  ISR:   0.303  SAR:  -1.995  
bass            ==> SDR:   0.037  SIR: 228.971  ISR:   0.303  SAR:  -1.995  



 66%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                                   | 33/50 [18:30<09:11, 32.43s/it]

drums           ==> SDR:   0.339  SIR: 239.315  ISR:   0.878  SAR:  -2.628  
bass            ==> SDR:   0.339  SIR: 239.315  ISR:   0.878  SAR:  -2.628  



 68%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                               | 34/50 [19:04<08:48, 33.02s/it]

drums           ==> SDR:   0.791  SIR: 238.834  ISR:   1.107  SAR:  -0.240  
bass            ==> SDR:   0.791  SIR: 238.834  ISR:   1.107  SAR:  -0.240  



 70%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                           | 35/50 [19:39<08:22, 33.48s/it]

drums           ==> SDR:   0.495  SIR: 213.666  ISR:   0.771  SAR:  -1.649  
bass            ==> SDR:   0.495  SIR: 213.666  ISR:   0.771  SAR:  -1.649  



 72%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▎                                                       | 36/50 [19:58<06:50, 29.34s/it]

drums           ==> SDR:   0.139  SIR: 197.472  ISR:   0.525  SAR:  -6.786  
bass            ==> SDR:   0.139  SIR: 197.472  ISR:   0.525  SAR:  -6.786  



 76%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                               | 38/50 [20:34<05:01, 25.08s/it]

drums           ==> SDR:   0.407  SIR: 213.485  ISR:   0.909  SAR:  -1.036  
bass            ==> SDR:   0.407  SIR: 213.485  ISR:   0.909  SAR:  -1.036  



 78%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                           | 39/50 [21:12<05:16, 28.80s/it]

drums           ==> SDR:   0.453  SIR: 229.073  ISR:   0.695  SAR:   0.839  
bass            ==> SDR:   0.453  SIR: 229.073  ISR:   0.695  SAR:   0.839  



 80%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                       | 40/50 [21:51<05:18, 31.87s/it]

drums           ==> SDR:   0.129  SIR: 178.018  ISR:   0.308  SAR:  -3.488  
bass            ==> SDR:   0.129  SIR: 178.018  ISR:   0.308  SAR:  -3.488  



 82%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                                   | 41/50 [22:25<04:54, 32.70s/it]

drums           ==> SDR:   0.274  SIR: 159.015  ISR:   0.885  SAR:  -0.820  
bass            ==> SDR:   0.274  SIR: 159.015  ISR:   0.885  SAR:  -0.820  



 84%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                               | 42/50 [23:00<04:26, 33.37s/it]

drums           ==> SDR:   0.372  SIR: 116.229  ISR:   0.863  SAR:  -1.249  
bass            ==> SDR:   0.372  SIR: 116.229  ISR:   0.863  SAR:  -1.249  



 86%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████▏                           | 43/50 [23:37<04:01, 34.48s/it]

drums           ==> SDR:   0.226  SIR: 150.010  ISR:   0.711  SAR:  -2.802  
bass            ==> SDR:   0.226  SIR: 150.010  ISR:   0.711  SAR:  -2.802  



 88%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                        | 44/50 [24:11<03:24, 34.11s/it]

drums           ==> SDR:   0.174  SIR: 196.279  ISR:   0.717  SAR:  -1.803  
bass            ==> SDR:   0.174  SIR: 196.279  ISR:   0.717  SAR:  -1.803  



 90%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                    | 45/50 [24:49<02:56, 35.28s/it]

drums           ==> SDR:   0.099  SIR: 194.325  ISR:   0.156  SAR:  -0.978  
bass            ==> SDR:   0.099  SIR: 194.325  ISR:   0.156  SAR:  -0.978  



 92%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████                | 46/50 [25:25<02:22, 35.53s/it]

drums           ==> SDR:   1.031  SIR: 212.197  ISR:   1.771  SAR:  -3.414  
bass            ==> SDR:   1.031  SIR: 212.197  ISR:   1.771  SAR:  -3.414  



 94%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████            | 47/50 [25:44<01:32, 30.77s/it]

drums           ==> SDR:   0.352  SIR: 130.753  ISR:   0.387  SAR:  -0.988  
bass            ==> SDR:   0.352  SIR: 130.753  ISR:   0.387  SAR:  -0.988  



 96%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████        | 48/50 [26:21<01:04, 32.45s/it]

drums           ==> SDR:   0.254  SIR: 190.764  ISR:   0.436  SAR:  -2.634  
bass            ==> SDR:   0.254  SIR: 190.764  ISR:   0.436  SAR:  -2.634  



 98%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████    | 49/50 [26:55<00:33, 33.03s/it]

drums           ==> SDR:   0.067  SIR: 196.574  ISR:   0.351  SAR:  -4.639  
bass            ==> SDR:   0.067  SIR: 196.574  ISR:   0.351  SAR:  -4.639  



100%|███████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 50/50 [27:30<00:00, 33.01s/it]

drums           ==> SDR:   1.016  SIR: 203.819  ISR:   1.542  SAR:  -2.899  
bass            ==> SDR:   1.016  SIR: 203.819  ISR:   1.542  SAR:  -2.899  






In [14]:
results.agg_frames_tracks_scores()

target  metric
bass    ISR         0.80913
        SAR        -1.78432
        SDR         0.39301
        SIR       198.94568
drums   ISR         0.80913
        SAR        -1.78432
        SDR         0.39301
        SIR       198.94568
Name: score, dtype: float64