# PIT-S-CNN BSS Eval example notebook

This notebook contains an example of computing SDR, SIR, and SAR improvements on signals separated using Lab41's model.  

In [None]:
# Generic imports
import sys
import time

import numpy as np
import tensorflow as tf

# Plotting imports
import IPython
from IPython.display import Audio
from matplotlib import pyplot as plt
fig_size = [0,0]
fig_size[0] = 8
fig_size[1] = 4
plt.rcParams["figure.figsize"] = fig_size

# Import Lab41's separation model
from magnolia.dnnseparate.pit import PITModel

# Import utilities for using the model
from magnolia.features.hdf5_iterator import SplitsIterator
from magnolia.features.supervised_iterator import SupervisedMixer
from magnolia.utils.clustering_utils import clustering_separate, preprocess_signal
from magnolia.features.mixer import FeatureMixer
from magnolia.features.spectral_features import istft, scale_spectrogram
from magnolia.utils.postprocessing import reconstruct
from magnolia.features.data_preprocessing import undo_preemphasis
from magnolia.utils.bss_eval import bss_eval_sources


## Paths

In [None]:
libritest = "** Path to librispeech test hdf5 **"
model_path = "** Path to model checkpoint **"
libritrain = "** path to LibriSpeech train hdf5 **"
female_speakers = '** path to list of female speakers in train set (available in repo) **'
male_speakers = '** path to list of male speakers in train set (in repo) **'
female_speakers_test = 'data/librispeech/authors/test-clean-M.txt'
male_speakers_test = 'data/librispeech/authors/test-clean-M.txt'

### Hyperparameters

    fft_size    : Number of samples in the fft window
    overlap     : Amount of overlap in the fft windows
    sample_rate : Number of samples per second in the input signals
    numsources  : Number of sources
    datashape   : (Number of time steps, nubmer of frequency bins)
    preemp_coef : Preemphasis coefficient

In [None]:
fft_size = 512
overlap = 0.0256
sample_rate = 10000
numsources = 2
datashape = (51, fft_size//2 + 1)
preemp_coef = 0.95

### Initialize and load an instance of Lab41's source separation model

In [None]:
tf.reset_default_graph()

model = PITModel(method='pit-s-cnn', num_steps=datashape[0], num_freq_bins=datashape[1], num_srcs=numsources)

config = tf.ConfigProto()
config.allow_soft_placement = True
config.gpu_options.allow_growth = True

sess = tf.Session(config=config)
model.load(model_path, sess)

### Define some helper functions for evaluating BSS metrics

In [None]:
def bss_eval_sample(mixer, num_sources):
    """
    Function to generate a sample from mixer and evaluate BSS metrics on it
    """
    
    # Generate a sample
    data = next(mixer)
    
    # Get the waveforms for the mixed signal and the true sources
    mixes = [reconstruct(data[0], data[0], sample_rate, None, overlap, preemphasis=preemp_coef)] * num_sources
    sources = [reconstruct(src, src, sample_rate, None, overlap, preemphasis=preemp_coef) for metadata, src in data[1:]]
    
    # Stack the input mix and the true sources into arrays
    input_mix = np.stack(mixes)
    reference_sources = np.stack(sources)
    
    # Use the model to separate the signal into the desired number of sources
    spec = data[0]
    spec_mag, spec_phase = scale_spectrogram(spec)
    sources_spec = model.separate(spec_mag, sess)
    estimated_sources = np.stack([reconstruct(x, spec, sample_rate, None, overlap, 
                                     square=True, preemphasis=preemp_coef) for x in sources_spec])
    
    # Compute the SDR, SIR, SAR of the input mixes
    do_nothing = bss_eval_sources(reference_sources, input_mix)
    
    # Compute the SDR, SIR, SAR of the separated sources
    do_something = bss_eval_sources(reference_sources, estimated_sources)
    
    # Compute the SDR, SIR, SAR improvement due to separation
    sdr = do_something[0] - do_nothing[0]
    sir = do_something[1] - do_nothing[1]
    sar = do_something[2] - do_nothing[2]
    
    return {'SDR': sdr, 'SIR': sir, 'SAR': sar}

## Evaluation of in set BSS metrics

This section shows the evaluation of SDR, SIR, and SAR on mixtures of speakers that are in the training set.

### Get the speaker keys corresponding to F and M speakers in the training set

In [None]:
with open(female_speakers,'r') as speakers:
    keys = speakers.read().splitlines()
    speaker_keys = keys[:]
    in_set_F = keys[:]
    
with open(male_speakers,'r') as speakers:
    keys = speakers.read().splitlines()
    speaker_keys += keys
    in_set_M = keys[:]

### Create mixers for in set FF, FM, MM, and all speaker mixes.

The splits used in creating each SplitsIterator should be the same as the ones used in training the model.

In [None]:
# Create an iterator over the male speakers in set and set the active split to the test split
maleiter = SplitsIterator([0.8,0.1,0.1], libritrain, speaker_keys=in_set_M, shape=(150,fft_size//2+1), return_key=True)
maleiter.set_split(2)

# Create an iterator over the female speakers in set and set the active split to the test split
femaleiter = SplitsIterator([0.8,0.1,0.1], libritrain, speaker_keys=in_set_F, shape=(150,fft_size//2+1), return_key=True)
femaleiter.set_split(2)

# Create mixers for each type of possible speaker mixes
MMmixer = SupervisedMixer([maleiter,maleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
FFmixer = SupervisedMixer([femaleiter,femaleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
MFmixer = SupervisedMixer([maleiter,femaleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
FMmixer = SupervisedMixer([femaleiter,maleiter], shape=(150,fft_size//2+1), 
                          mix_method='add', diffseed=True)
mixers = [MMmixer, FFmixer, MFmixer, FMmixer]

# Some book keeping in preparation for evaluating on samples from the mixers
mixerdesc = ['MM','FF','MF','FM']
mixersSDR = [[],[],[],[]]
mixersSIR = [[],[],[],[]]
mixersSAR = [[],[],[],[]]
    
i=0

### Evaluate BSS metrics on 500 samples from each mixer

In [None]:
# Number of samples to evaluate
num_samples = 500

# Get the starting i
try:
    starti = i
except:
    starti = 0

# Iterate over samples, computing BSS metrics for samples from each mixer
for i in range(starti, num_samples):
    for j,mixer in enumerate(mixers):
        
        # Compute SDR, SIR, SAR for this mixer
        evals = bss_eval_sample(mixer, 2)
        
        # Store the results
        mixersSDR[j].append( 1/(2)*(evals['SDR'][0] + evals['SDR'][1]) )
        mixersSIR[j].append( 1/(2)*(evals['SIR'][0] + evals['SIR'][1]) )
        mixersSAR[j].append( 1/(2)*(evals['SAR'][0] + evals['SAR'][1]) )
        
        # Compute the mean SDR, SIR, SAR
        MMSDR = np.mean(mixersSDR[0])
        FFSDR = np.mean(mixersSDR[1])
        MFSDR = np.mean(mixersSDR[2])
        FMSDR = np.mean(mixersSDR[3])

    # Clear the display and show the progress so far
    IPython.display.clear_output(wait=True)
    print(str(i)+':' + 
                 '  MM: ' + str(MMSDR) +
                 ', FF: ' + str(FFSDR) +
                 ', MF: ' + str((MFSDR+FMSDR)/2) +
                 ', All: '+ str((MMSDR+FMSDR+MFSDR+FFSDR)/4))

## Evaluation of out of set BSS metrics

This section shows the evaluation of SDR, SIR, SAR on mixtures of speakers that were not in the training set

### Get the speaker keys for F and M speakers from the test set

In [None]:
with open(female_speakers_test,'r') as speakers:
    out_set_F = speakers.read().splitlines()

with open(male_speakers_test,'r') as speakers:
    out_set_M = speakers.read().splitlines()
    
all_speakers = out_set_F + out_set_M

### Create mixers for out of set FF FM MM, all, speaker mixes

In [None]:
# Make an iterator over female speakers
Fiterator = SplitsIterator([1], libritest, speaker_keys=out_set_F, shape=datashape, return_key=True)
Fiterator.set_split(0)

# Make an iterator over male speakers
Miterator = SplitsIterator([1], libritest, speaker_keys=out_set_M, shape=datashape, return_key=True)
Miterator.set_split(0)

# Make an iterator over all speakers
Aiterator = SplitsIterator([1], libritest, speaker_keys=all_speakers, shape=datashape, return_key=True)


# Create mixers for each combination of speakers
outsetFFmixer = SupervisedMixer([Fiterator,Fiterator], shape=datashape, 
                        mix_method='add', diffseed=True)
outsetFMmixer = SupervisedMixer([Fiterator,Miterator], shape=datashape, 
                        mix_method='add', diffseed=True)
outsetMMmixer = SupervisedMixer([Miterator,Miterator], shape=datashape, 
                        mix_method='add', diffseed=True)
outsetAAmixer = SupervisedMixer([Aiterator,Aiterator], shape=datashape, 
                        mix_method='add', diffseed=True)

Evaluation of the BSS metrics for out of set speakers works as above for in set speakers using these mixers.