# Lab41 model BSS Eval example notebook

This notebook contains an example of computing SDR, SIR, and SAR improvements on signals separated using Lab41's model.  

In [None]:
# Generic imports
import json
import numpy as np
import pandas as pd

# Imports to play audio
# Plotting imports
import IPython
from IPython.display import Audio, display
from matplotlib import pyplot as plt
fig_size = [0,0]
fig_size[0] = 8
fig_size[1] = 4
plt.rcParams["figure.figsize"] = fig_size

# Import Lab41's separation model
from magnolia.dnnseparate.L41model import L41Model

# Import utilities for using the model
from magnolia.utils.postprocessing import convert_preprocessing_parameters
from magnolia.features.preprocessing import undo_preprocessing
from magnolia.iterate.mix_iterator import MixIterator
from magnolia.utils.clustering_utils import l41_clustering_separate, get_cluster_masks
from magnolia.utils.bss_eval import bss_eval_sources

### Hyperparameters

* **model_location** : Specify the location of where to store the model (CPU or GPU)
* **model_settings** : Path to model configuration settings
* **mixes**          : List of mix configuration settings to source-separate
* **from_disk**      : Whether or not to read mixes from disk

In [None]:
# from model settings
model_params = {
    'nonlinearity': 'tanh',
    'layer_size': 600,
    'embedding_size': 40,
    'normalize': 'False'
}
uid_settings = '/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/assign_uids_LibriSpeech_UrbanSound8K.json'
model_save_base = '/local_data/magnolia/experiment_data/date_2017_09_28_time_13_14/aux/model_saves/l41'

model_location = '/cpu:0'
model_settings = ''
mixes = ['/local_data/magnolia/pipeline_data/date_2017_09_27_time_13_25/settings/mixing_LibriSpeech_UrbanSound8K_test_out_of_sample.json']
from_disk = True

### Data iterator

Create a mix iterator that loops through the mixes one at a time.

In [None]:
mixer = MixIterator(mixes_settings_filenames=mixes,
                    batch_size=1,
                    from_disk=from_disk)

### Initialize and load an instance of Lab41's source separation model

In [None]:
model = L41Model(nonlinearity='tanh', normalize=False)
model.load("Path to model file")

### Define some helper functions for evaluating BSS metrics

In [None]:
def invert_spectrogram(magnitude,phase):
    """
    Function to invert stft spectrograms using the specified hyperparameters
    """
    return istft(np.square(magnitude)*np.exp(phase*1.0j),sample_rate,None,overlap,two_sided=False,fft_size=fft_size)

def bss_eval_sample(mixer, num_sources):
    """
    Function to generate a sample from mixer and evaluate BSS metrics on it
    """
    
    # Generate a sample
    data = next(mixer)
    
    # Get the waveforms for the mixed signal and the true sources
    mixes = [invert_spectrogram(np.abs(data[0]),np.unwrap(np.angle(data[0]))) for i in range(1,num_sources + 1)]
    sources = [invert_spectrogram(np.abs(data[i][1]),np.unwrap(np.angle(data[i][1]))) for i in range(1,num_sources + 1)]
    
    # Undo the preemphasis that was applied to the input signals
    mixes = [undo_preemphasis(mix) for mix in mixes]
    sources = [undo_preemphasis(source) for source in sources]
    
    # Stack the input mix and the true sources into arrays
    input_mix = np.stack(mixes)
    reference_sources = np.stack(sources)
    
    # Use the model to separate the signal into the desired number of sources
    estimated_sources = clustering_separate(mixes[0],1e4,model,num_sources)
    
    # Compute the SDR, SIR, SAR of the input mixes
    do_nothing = bss_eval_sources(reference_sources, input_mix)
    
    # Compute the SDR, SIR, SAR of the separated sources
    do_something = bss_eval_sources(reference_sources, estimated_sources)
    
    # Compute the SDR, SIR, SAR improvement due to separation
    sdr = do_something[0] - do_nothing[0]
    sir = do_something[1] - do_nothing[1]
    sar = do_something[2] - do_nothing[2]
    
    return {'SDR': sdr, 'SIR': sir, 'SAR': sar}

## Evaluation of in set BSS metrics

This section shows the evaluation of SDR, SIR, and SAR on mixtures of speakers that are in the training set.

### Get the speaker keys corresponding to F and M speakers in the training set

In [None]:
libritrain = "Path to training dataset"

with open('Magnolia/data/librispeech/authors/train-clean-100-F.txt','r') as speakers:
    keys = speakers.read().splitlines()
    speaker_keys = keys[:]
    in_set_F = keys[:]
    
with open('Magnolia/data/librispeech/authors/train-clean-100-M.txt','r') as speakers:
    keys = speakers.read().splitlines()
    speaker_keys += keys
    in_set_M = keys[:]

### Create mixers for in set FF, FM, MM, and all speaker mixes.

The splits used in creating each SplitsIterator should be the same as the ones used in training the model.

In [None]:
# Create an iterator over the male speakers in set and set the active split to the test split
maleiter = SplitsIterator([0.8,0.1,0.1], libritrain, speaker_keys=in_set_M, shape=(150,fft_size//2+1), return_key=True)
maleiter.set_split(2)

# Create an iterator over the female speakers in set and set the active split to the test split
femaleiter = SplitsIterator([0.8,0.1,0.1], libritrain, speaker_keys=in_set_F, shape=(150,fft_size//2+1), return_key=True)
femaleiter.set_split(2)

# Create mixers for each type of possible speaker mixes
MMmixer = SupervisedMixer([maleiter,maleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
FFmixer = SupervisedMixer([femaleiter,femaleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
MFmixer = SupervisedMixer([maleiter,femaleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
FMmixer = SupervisedMixer([femaleiter,maleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
mixers = [MMmixer, FFmixer, MFmixer, FMmixer]

# Some book keeping in preparation for evaluating on samples from the mixers
mixerdesc = ['MM','FF','MF','FM']
mixersSDR = [[],[],[],[]]
mixersSIR = [[],[],[],[]]
mixersSAR = [[],[],[],[]]
    
i=0

### Evaluate BSS metrics on 500 samples from each mixer

In [None]:
# Number of samples to evaluate
num_samples = 500

# Get the starting i
try:
    starti = i
except:
    starti = 0

# Iterate over samples, computing BSS metrics for samples from each mixer
for i in range(starti, num_samples):
    for j,mixer in enumerate(mixers):
        
        # Compute SDR, SIR, SAR for this mixer
        evals = bss_eval_sample(mixer, 2)
        
        # Store the results
        mixersSDR[j].append( 1/(2)*(evals['SDR'][0] + evals['SDR'][1]) )
        mixersSIR[j].append( 1/(2)*(evals['SIR'][0] + evals['SIR'][1]) )
        mixersSAR[j].append( 1/(2)*(evals['SAR'][0] + evals['SAR'][1]) )
        
        # Compute the mean SDR, SIR, SAR
        MMSDR = np.mean(mixersSDR[0])
        FFSDR = np.mean(mixersSDR[1])
        MFSDR = np.mean(mixersSDR[2])
        FMSDR = np.mean(mixersSDR[3])

    # Clear the display and show the progress so far
    IPython.display.clear_output(wait=True)
    print(str(i)+':' + 
                 '  MM: ' + str(MMSDR) +
                 ', FF: ' + str(FFSDR) +
                 ', MF: ' + str((MFSDR+FMSDR)/2) +
                 ', All: '+ str((MMSDR+FMSDR+MFSDR+FFSDR)/4))

### Evaluate BSS metrics on in set mixtures of 3 speakers

In [None]:
# Create an iterator over all the speakers with the same splits used in training and set the active split
# to the test split
siteratorT = SplitsIterator([0.8,0.1,0.1], libritrain, speaker_keys=speaker_keys, shape=datashape, return_key=True)
siteratorT.set_split(2)

# Make a mixer for 3-speaker mixes
TImixer = SupervisedMixer([siteratorT,siteratorT,siteratorT], shape=(150,fft_size//2), 
                        mix_method='add', diffseed=True)

# Book keeping to store the results of the evaluation
iallmixers = [TImixer]
iallmixerdesc = ['all']
iallmixersSDR = [[],[],[],[]]
iallmixersSIR = [[],[],[],[]]
iallmixersSAR = [[],[],[],[]]
iallTSDR = 0

i=0

### Evaluate BSS metrics on 500 samples of in set mixtures of 3 speakers

In [None]:
num_samples = 500
try:
    starti = i
except:
    starti = 0

# Iterate over samples from the 3 speaker mixer
while i < num_samples:
    for j,mixer in enumerate(iallmixers):
        # Compute BSS evaluation metrics
        evals = bss_eval_sample(mixer, 3)
        
        # Store the results
        iallmixersSDR[j].append( 1/(3)*(evals['SDR'][0] + evals['SDR'][1] + evals['SDR'][2]) )
        iallmixersSIR[j].append( 1/(3)*(evals['SIR'][0] + evals['SIR'][1] + evals['SIR'][2]) )
        iallmixersSAR[j].append( 1/(3)*(evals['SAR'][0] + evals['SAR'][1] + evals['SAR'][2]) )
        
        # Get the mean SDR
        iallTSDR = np.mean(iallmixersSDR[0])

        i += 1
    
    # Clear display and show current results
    IPython.display.clear_output(wait=True)
    print(str(i)+':' + 
                 '  T: ' + str(iallTSDR))

## Evaluation of out of set BSS metrics

This section shows the evaluation of SDR, SIR, SAR on mixtures of speakers that were not in the training set

### Get the speaker keys for F and M speakers from the test set

In [None]:
with open('Magnolia/data/librispeech/authors/test-clean-F.txt','r') as speakers:
    out_set_F = speakers.read().splitlines()

with open('Magnolia/data/librispeech/authors/test-clean-M.txt','r') as speakers:
    out_set_M = speakers.read().splitlines()
    
all_speakers = out_set_F + out_set_M

### Create mixers for out of set FF FM MM, all, speaker mixes

In [None]:
# Make an iterator over female speakers
Fiterator = SplitsIterator([1], libritest, speaker_keys=out_set_F, shape=datashape, return_key=True)
Fiterator.set_split(0)

# Make an iterator over male speakers
Miterator = SplitsIterator([1], libritest, speaker_keys=out_set_M, shape=datashape, return_key=True)
Miterator.set_split(0)

# Make an iterator over all speakers
Aiterator = SplitsIterator([1], libritest, speaker_keys=all_speakers, shape=datashape, return_key=True)


# Create mixers for each combination of speakers
outsetFFmixer = SupervisedMixer([Fiterator,Fiterator], shape=datashape, 
                        mix_method='add', diffseed=True)
outsetFMmixer = SupervisedMixer([Fiterator,Miterator], shape=datashape, 
                        mix_method='add', diffseed=True)
outsetMMmixer = SupervisedMixer([Miterator,Miterator], shape=datashape, 
                        mix_method='add', diffseed=True)
outsetAAmixer = SupervisedMixer([Aiterator,Aiterator], shape=datashape, 
                        mix_method='add', diffseed=True)

Evaluation of the BSS metrics for out of set speakers works as above for in set speakers using these mixers.