In [2]:
import torch
import torch.nn as nn
import numpy as np
import scipy
import librosa
import youtube_dl
import os
import soundfile as sf
from IPython.display import Audio, display

use_cuda = torch.cuda.is_available()
device = torch.device("cuda" if use_cuda else "cpu")
print(device)

cpu


# Change dir and instance model

In [3]:
%cd ../models/Conditioned-Source-Separation-LaSAFT

/Users/arron/code/PlexedLive/mixs/models/Conditioned-Source-Separation-LaSAFT


In [4]:
from lasaft.source_separation.conditioned.cunet.models.dcun_tfc_gpocm_lasaft import DCUN_TFC_GPoCM_LaSAFT_Framework

args = {}

# FFT params
args['n_fft'] = 2048
args['hop_length'] = 1024
args['num_frame'] = 128

# SVS Framework
args['spec_type'] = 'complex'
args['spec_est_mode'] = 'mapping'

# Other Hyperparams
args['optimizer'] = 'adam'
args['lr'] = 0.001
args['dev_mode'] = False
args['train_loss'] = 'spec_mse'
args['val_loss'] = 'raw_l1'

# DenseNet Hyperparams

args ['n_blocks'] = 7
args ['input_channels'] = 4
args ['internal_channels'] = 24
args ['first_conv_activation'] = 'relu'
args ['last_activation'] = 'identity'
args ['t_down_layers'] = None
args ['f_down_layers'] = None
args ['tif_init_mode'] = None

# TFC_TDF Block's Hyperparams
args['n_internal_layers'] =5
args['kernel_size_t'] = 3
args['kernel_size_f'] = 3
args['tfc_tdf_activation'] = 'relu'
args['bn_factor'] = 16
args['min_bn_units'] = 16
args['tfc_tdf_bias'] = True
args['num_tdfs'] = 6
args['dk'] = 32

args['control_vector_type'] = 'embedding'
args['control_input_dim'] = 4
args['embedding_dim'] = 32
args['condition_to'] = 'decoder'

args['control_n_layer'] = 4
args['control_type'] = 'dense'
args['pocm_type'] = 'matmul'
args['pocm_norm'] = 'batch_norm'


model = DCUN_TFC_GPoCM_LaSAFT_Framework(**args)

In [5]:
model = model.load_from_checkpoint('pretrained/gpocm_lasaft.ckpt')

# Initialising song from musb

In [70]:
track_dir = '../../raw_data/musdb18/test/'
track_name = 'Tom McKenzie - Directions.stem.mp4'


# old code below

audio, rate = librosa.load(f'{track_dir}{track_name}', mono=False, sr=None)

# audio.shape

In [8]:
import musdb

In [77]:
DB = musdb.DB(root='../../raw_data/musdb18/samples', subsets='test')

In [90]:
audio = DB.tracks[0].audio

In [91]:
track = DB.tracks[0]

In [79]:
audio.shape

(13779968, 2)

Cell below takes the audio, runs the model to split audio and outputs the numpy array of the split audio

In [80]:
def separate_all(audio):
    '''
    you can add or remove what ever you want in this loop
    using the keys, vocals, drums, bass and other
    you can also combine them 
    '''
    
    print('vocals')
    vocals = model.separate_track(audio, 'vocals') 

    print('drums')
    drums = model.separate_track(audio, 'drums') 


    print('bass')
    bass = model.separate_track(audio, 'bass') 


    print('other')
    other = model.separate_track(audio, 'other') 

    
    return vocals, drums, bass, other

In [81]:
vocals_test, drums_test, bass_test, other_test = separate_all(audio)

vocals
drums
bass
other


In [13]:
test.shape

(11142144, 2)

In [87]:
estimates = {
    'vocals': vocals_test,
    'drums': drums_test,
    'bass': bass_test,
    'accompaniment': other_test
}

In [20]:
!pwd

/Users/arron/code/PlexedLive/mixs/models/Conditioned-Source-Separation-LaSAFT


In [25]:
np.save('../../notebooks/test_files/outputs', test)

In [26]:
load = np.load('../../notebooks/test_files/outputs.npy', allow_pickle=True)

In [27]:
est_dir = '../../notebooks/test_files/'

In [37]:
import museval

In [92]:
results = museval.EvalStore(frames_agg='median', tracks_agg='median')
results.add_track(museval.eval_mus_track(track, estimates))

  ['name']


In [96]:
results

Aggrated Scores (median over frames, median over tracks)
drums           ==> SDR:   3.463  SIR:  18.229  ISR:   5.281  SAR:   3.337  
bass            ==> SDR:  13.900  SIR:  30.086  ISR:  14.640  SAR:  12.680  
vocals          ==> SDR:   7.021  SIR:  11.544  ISR:  16.015  SAR:   7.765  
accompaniment   ==> SDR:   3.253  SIR:  15.508  ISR:   4.705  SAR:   4.542  

In [101]:
results.save(path='../../notebooks/test_files/test.pandas')

In [103]:
import pandas as pd

In [106]:
pd.read_pickle('../../notebooks/test_files/test.pandas').groupby('target').mean()

Unnamed: 0_level_0,time,score
target,Unnamed: 1_level_1,Unnamed: 2_level_1
accompaniment,155.5,7.746779
bass,155.5,13.410577
drums,155.5,7.756908
vocals,155.5,8.121085
