## Exploration Notes

1. Spleeter can separate audio into 2, 4, or 5 stems
2. Dependent on installation of *ffmpeg* & *libsndfile*
3. If you have issues with ffmpeg probe, pip uninstall ffmpeg-python then pip install ffmpeg-python
4. pip install spleeter
5. There is a conda install (and a conda install for running on GPU) but I was not able to get it to work

In [1]:
from spleeter.separator import Separator
from spleeter.audio.adapter import  get_default_audio_adapter
import librosa
from IPython.display import Audio, display
import numpy as np
import youtube_dl
import os
import museval
import musdb

## 4-stem implementation with one song

In [None]:
separator = Separator('spleeter:4stems')

In [None]:
! pwd

In [None]:
track_dir = '../raw_data/small_sample/train'
track_name = "James May - On The Line.stem.mp4"
audio, rate = librosa.load(f'{track_dir}/{track_name}', mono=False, sr=None)
rate

In [None]:
audio_loader = get_default_audio_adapter()
waveform, _ = audio_loader.load(f'{track_dir}/{track_name}', sample_rate=rate)

In [None]:
%%time
prediction = separator.separate(waveform)

In [None]:
# When ready to do multiple predictions and save output, uncomment line below:

# separator.separate_to_file('/path/to/audio', '/path/to/output/directory')

In [10]:
def display_audio(prediction):
    for key, val in prediction.items():
        print(key)
        display(Audio(val.T, rate=44100))

In [None]:
display_audio(prediction)

## 5-stem implementation with one song

In [2]:
separator_5 = Separator('spleeter:5stems')

In [3]:
url = "gxEPV4kolz0" #@param {type:"string"}

In [None]:
def my_hook(d):
    if d['status'] == 'finished':
        print('Done downloading...')


ydl_opts = {
    'format': 'bestaudio/best',
    'postprocessors': [{
        'key': 'FFmpegExtractAudio',
        'preferredcodec': 'wav',
        'preferredquality': '44100',
    }],
    'outtmpl': '%(title)s.wav',
    'progress_hooks': [my_hook],
}
with youtube_dl.YoutubeDL(ydl_opts) as ydl:
    info = ydl.extract_info(url, download=False)
    status = ydl.download([url])

audio, rate = librosa.load(info.get('title', None) + '.wav', sr=44100, mono=False)
print(audio.shape)
display(Audio(audio, rate=rate))

[youtube] gxEPV4kolz0: Downloading webpage
[youtube] gxEPV4kolz0: Downloading webpage
[download] Billy Joel - Piano Man (Video).wav has already been downloaded
[download] 100% of 5.57MiB
Done downloading...
[ffmpeg] Post-process file Billy Joel - Piano Man (Video).wav exists, skipping




(2, 15062017)


In [None]:
audio_loader = get_default_audio_adapter()
waveform_5, _ = audio_loader.load(info.get('title', None) + '.wav', sample_rate=44100)

In [None]:
%%time
prediction_5 = separator_5.separate(waveform_5)

In [None]:
display_audio(prediction_5)

## 4-stem implementation with multiple songs

In [2]:
separator_4 = Separator('spleeter:4stems')

In [3]:
%%time
track_dir = '../raw_data/small_sample/test'
audio_loader = get_default_audio_adapter()
tracks = tracks = [track for track in os.listdir(track_dir) if track.endswith(".mp4")]
predictions_4 = []
for track in tracks:
    waveform, rate = audio_loader.load(f'{track_dir}/{track}', sample_rate=None)
    predictions_4.append(separator_4.separate(waveform))

len(predictions_4)



INFO:tensorflow:Apply unet for vocals_spectrogram
INFO:tensorflow:Apply unet for drums_spectrogram
INFO:tensorflow:Apply unet for bass_spectrogram
INFO:tensorflow:Apply unet for other_spectrogram


INFO:spleeter:Downloading model archive https://github.com/deezer/spleeter/releases/download/v1.4.0/4stems.tar.gz
INFO:spleeter:Validating archive checksum
INFO:spleeter:Extracting downloaded 4stems archive
INFO:spleeter:4stems model file(s) extracted


INFO:tensorflow:Restoring parameters from pretrained_models\4stems\model
Wall time: 14min 7s


20

In [4]:
predictions_4[17]

{'vocals': array([[ 8.2174216e-07,  2.3738392e-06],
        [-5.7736829e-06, -4.0965188e-06],
        [-7.4836057e-06, -7.2264124e-06],
        ...,
        [-4.9810780e-07, -7.4118924e-07],
        [-6.9197563e-07, -9.0814711e-07],
        [ 2.6827118e-08, -2.2358851e-07]], dtype=float32),
 'drums': array([[ 2.0232576e-06,  3.0989161e-06],
        [-5.2794276e-06, -4.0503533e-06],
        [-5.8765031e-06, -5.1982811e-06],
        ...,
        [-2.2698906e-08, -2.1638142e-08],
        [ 2.4460123e-08,  2.3717826e-08],
        [ 1.8263480e-08,  1.4560887e-08]], dtype=float32),
 'bass': array([[ 1.9819734e-06,  3.2598357e-06],
        [-4.9282285e-06, -3.7297330e-06],
        [-5.2122541e-06, -4.6598102e-06],
        ...,
        [ 4.7658379e-08,  4.2528288e-08],
        [ 1.8116408e-07,  1.8178979e-07],
        [ 2.7007419e-08,  2.2057860e-08]], dtype=float32),
 'other': array([[ 2.1506859e-05,  2.2276516e-05],
        [-2.6900949e-05, -2.7346205e-05],
        [-2.9021370e-05, -3.268405

### Evaluation of 4-stem model

In [5]:
! pwd

/c/Users/njeri/code/ngachago/mixs/notebooks


In [7]:
np.save('test_files/spleeter_4', predictions)

In [2]:
predictions_4 = np.load('test_files/spleeter_4.npy', allow_pickle=True)

In [4]:
DB = musdb.DB(root='../raw_data/small_sample', subsets='test')

20

In [5]:
results_4 = museval.EvalStore(frames_agg='mean', tracks_agg='mean')
for idx, estimate in np.ndenumerate(predictions_4):
    results_4.add_track(museval.eval_mus_track(DB.tracks[idx], estimate))
                
results_4

KeyboardInterrupt: 

In [None]:
results.save(path='test_files/spleeter_4.pandas')

## 5-stem implementation with multiple songs

In [11]:
separator_5 = Separator('spleeter:5stems')

In [12]:
%%time
predictions_5 = []
for track in tracks:
    waveform, rate = audio_loader.load(f'{track_dir}/{track}', sample_rate=None)
    predictions_5.append(separator_5.separate(waveform))

len(predictions_5)



INFO:tensorflow:Apply unet for vocals_spectrogram
INFO:tensorflow:Apply unet for piano_spectrogram
INFO:tensorflow:Apply unet for drums_spectrogram
INFO:tensorflow:Apply unet for bass_spectrogram
INFO:tensorflow:Apply unet for other_spectrogram
INFO:tensorflow:Restoring parameters from pretrained_models\5stems\model
Wall time: 8min 11s


20

In [13]:
predictions_5[14]

{'vocals': array([[-5.9437498e-06,  2.7136530e-06],
        [-1.0363790e-05,  3.4015993e-06],
        [-9.4779962e-06,  1.7994013e-06],
        ...,
        [ 0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00]], dtype=float32),
 'piano': array([[-7.79070746e-08,  9.77548495e-08],
        [-1.61176175e-07,  1.11829479e-07],
        [-1.13123804e-07,  5.66841578e-08],
        ...,
        [ 0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00],
        [ 0.00000000e+00,  0.00000000e+00]], dtype=float32),
 'drums': array([[-6.1507666e-07,  5.6881100e-07],
        [-7.6188149e-07,  4.4469303e-07],
        [-7.5826102e-07,  3.8342640e-07],
        ...,
        [ 0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00],
        [ 0.0000000e+00,  0.0000000e+00]], dtype=float32),
 'bass': array([[-8.1884895e-07,  8.6586368e-07],
        [-9.2851548e-07,  8.7294569e-07],
        [-8.9386748e-0

### Evaluation of 5-stem model

In [None]:
np.save('test_files/spleeter_5', predictions_5)

In [None]:
predictions_5 = np.load('test_files/spleeter_5.npy', allow_pickle=True)

In [None]:
%%time
results_5 = museval.EvalStore(frames_agg='mean', tracks_agg='mean')
for idx, estimate in np.ndenumerate(predictions_5):
    results_5.add_track(museval.eval_mus_track(DB.tracks[idx], estimate))
                
results_5

In [None]:
results_5.save(path='test_files/spleeter_5.pandas')