# Tutorial 3: How simulations define your predictions
The inverse problem has no unique solution as it is ill-posed. In order to solve it we need to constraint the space of possible solutions. While inverse solutions like minimum-norm estimates have an explicit constraint of minimum-energy, the constraints with esinet are implicit and mostly shaped by the simulations.

This tutorial aims the relation between simulation parameters and predictions.

In [1]:
%matplotlib inline
%load_ext autoreload
%autoreload 2

# import mne
# import numpy as np
# from copy import deepcopy
# import matplotlib.pyplot as plt
import sys; sys.path.insert(0, '../')
from esinet import util
from esinet import Simulation
from esinet import Net
from esinet.forward import create_forward_model, get_info
from scipy.stats import pearsonr
from matplotlib import pyplot as plt
plot_params = dict(surface='white', hemi='both', verbose=0)

## Create Forward model
First we create a template forward model which comes with the esinet package

In [2]:
info = get_info(sfreq=100)
fwd = create_forward_model(sampling="ico3", info=info)

[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    2.2s remaining:    2.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    2.3s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    2.3s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.2s remaining:    0.2s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.2s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.2s finished
[Parallel(n_jobs=4)]: Using backend LokyBackend with 4 concurrent workers.
[Parallel(n_jobs=4)]: Done   2 out of   4 | elapsed:    0.3s remaining:    0.3s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.3s remaining:    0.0s
[Parallel(n_jobs=4)]: Done   4 out of   4 | elapsed:    0.3s finished


# Simulate

## defs

In [3]:
import numpy as np
import tensorflow as tf
import mne
def prep_data(sim):
    X = np.squeeze(np.stack([eeg.average().data for eeg in sim.eeg_data]))
    y = np.squeeze(np.stack([src.data for src in sim.source_data]))
    for i, (x_sample, y_sample) in enumerate(zip(X, y)):
        X[i] = np.stack([(x - np.mean(x)) / np.std(x) for x in x_sample.T], axis=0).T
        y[i] = np.stack([ y / np.max(abs(y)) for y in y_sample.T], axis=0).T
    X = np.swapaxes(X, 1,2)
    y = np.swapaxes(y, 1,2)
    
    return X, y

def sparsity(y_true, y_pred):
    return K.mean(K.square(y_pred)) / K.max(K.square(y_pred))
def custom_loss():
    def loss(y_true, y_pred):
        loss1 = tf.keras.losses.CosineSimilarity()(y_true, y_pred)
        loss2 = sparsity(None, y_pred)
        return loss1 + loss2 * 1e-3
    return loss

from esinet.evaluate import auc_metric, eval_auc, eval_nmse, eval_mean_localization_error

def eval(y_true, y_hat):
    n_samples = y_true.shape[0]
    n_time = y_true.shape[1]
    aucs = np.zeros((n_samples, n_time))
    mles = np.zeros((n_samples, n_time))
    nmses = np.zeros((n_samples, n_time))
    for i in range(n_samples):
        for j in range(n_time):
            aucs[i,j] = np.mean(eval_auc(y_true[i,j], y_hat[i,j], pos))
            nmses[i,j] = eval_nmse(y_true[i,j], y_hat[i,j])
            mles[i,j] = eval_mean_localization_error(y_true[i,j], y_hat[i,j], pos)

    return aucs, nmses, mles

def threshold_activation(x):
    return tf.cast(x > 0.5, dtype=tf.float32)

class Compressor:
    ''' Compression using Graph Fourier Transform
    '''
    def __init__(self):
        pass
    def fit(self, fwd, k=600):
        A = mne.spatial_src_adjacency(fwd["src"], verbose=0).toarray()
        D = np.diag(A.sum(axis=0))
        L = D-A
        U, s, V = np.linalg.svd(L)

        self.U = U[:, -k:]
        self.s = s[-k:]
        self.V = V[:, -k:]
        return self
        
    def encode(self, X):
        ''' Encodes a true signal X
        Parameters
        ----------
        X : numpy.ndarray
            True signal
        
        Return
        ------
        X_comp : numpy.ndarray
            Compressed signal
        '''
        X_comp = self.U.T @ X

        return X_comp

    def decode(self, X_comp):
        ''' Decodes a compressed signal X

        Parameters
        ----------
        X : numpy.ndarray
            Compressed signal
        
        Return
        ------
        X_unfold : numpy.ndarray
            Decoded signal
        '''
        X_unfold = self.U @ X_comp
        return X_unfold

## sim

In [4]:
n_samples = 5000
settings = dict(duration_of_trial=0.20, extents=(1,40), number_of_sources=(1,15), target_snr=1e99)
sim = Simulation(fwd, info, settings=settings).simulate(n_samples=n_samples)
X, y = prep_data(sim)

Simulating data based on sparse patches.


100%|██████████| 5000/5000 [00:21<00:00, 235.06it/s]
100%|██████████| 5000/5000 [00:00<00:00, 15966.18it/s]
100%|██████████| 5000/5000 [01:09<00:00, 71.58it/s]


# GFT

In [5]:
# comp = Compressor()
# comp.fit(fwd)
# y_comp = np.stack([comp.encode(yy.T).T for yy in y], axis=0)

y_comp = y

# %matplotlib qt
# plt.figure()
# plt.imshow(y[0], aspect=y[0].shape[1] / y[0].shape[0])
# plt.figure()
# plt.imshow(y_comp[0], aspect=y_comp[0].shape[1] / y_comp[0].shape[0])

# LSTM SINGLE-PATH

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, LSTM, GRU, multiply, add, Activation, Dropout, concatenate
from tensorflow.keras.regularizers import l1
from esinet.losses import nmse_loss, nmae_loss

leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, _ = leadfield.shape
n_dipoles = y_comp.shape[-1]
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')

n_dense_units = 61
n_lstm_units = 32
activation_function = "tanh"
batch_size = 32
epochs = 30
dropout = 0.2
input_dropout = 0.1

inputs = tf.keras.Input(shape=(None, n_channels), name='Input')


fc1 = TimeDistributed(Dense(n_dense_units, 
            activation=activation_function), 
            name='FC1')(inputs)
fc1 = Dropout(dropout)(fc1)

direct_out = TimeDistributed(Dense(n_dipoles, 
            activation="linear"),
            name='FC2')(fc1)

# Masking
lstm1 = Bidirectional(GRU(n_lstm_units, return_sequences=True, 
            input_shape=(None, n_dense_units), dropout=dropout), 
            name='LSTM1')(fc1)
mask = TimeDistributed(Dense(n_dipoles, 
            activation="sigmoid"), 
            name='Mask')(lstm1)

multi = multiply([direct_out, mask], name="multiply")
model = tf.keras.Model(inputs=inputs, outputs=multi, name='Contextualizer')


model.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")
model.summary()
model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.15)

# LSTM DOUBLE-PATH

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, LSTM, GRU, multiply, add, Activation, Dropout, concatenate
from tensorflow.keras.regularizers import l1
from esinet.losses import nmse_loss, nmae_loss

leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, _ = leadfield.shape
n_dipoles = y_comp.shape[-1]
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')

n_dense_units = 61
n_lstm_units = 32
activation_function = "tanh"
batch_size = 32
epochs = 30
dropout = 0.2
input_dropout = 0.1

inputs = tf.keras.Input(shape=(None, n_channels), name='Input')

fc0 = TimeDistributed(Dense(n_channels, 
            activation=activation_function), 
            name='FC0')(inputs)
fc0 = Dropout(dropout)(fc0)

# Context
lstm0 = Bidirectional(GRU(n_lstm_units, return_sequences=True, 
            input_shape=(None, n_dense_units), dropout=dropout), 
            name='LSTM0')(fc0)
cat = concatenate([lstm0, fc0])

fc1 = TimeDistributed(Dense(n_dense_units, 
            activation=activation_function), 
            name='FC1')(cat)
fc1 = Dropout(dropout)(fc1)

direct_out = TimeDistributed(Dense(n_dipoles, 
            activation="linear"),
            name='FC2')(fc1)




# Masking
lstm1 = Bidirectional(GRU(n_lstm_units, return_sequences=True, 
            input_shape=(None, n_dense_units), dropout=dropout), 
            name='LSTM1')(fc1)
mask = TimeDistributed(Dense(n_dipoles, 
            activation="sigmoid"), 
            name='Mask')(lstm1)

multi = multiply([direct_out, mask], name="multiply")
model = tf.keras.Model(inputs=inputs, outputs=multi, name='Contextualizer')


model.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")
model.summary()
model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.15)


# LSTM Encoder-Decoder

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, LSTM, GRU, multiply, add, Activation, Dropout, concatenate
from tensorflow.keras.regularizers import l1
from esinet.losses import nmse_loss, nmae_loss

leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, _ = leadfield.shape
n_dipoles = y_comp.shape[-1]
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')


n_dense_units = 61
n_lstm_units = 128
activation_function = "tanh"
batch_size = 32
epochs = 30
dropout = 0.2
input_dropout = 0.1

inputs = tf.keras.Input(shape=(None, n_channels), name='Input')
encoder_outputs, state_h, state_c = LSTM(n_lstm_units, return_state=True)(inputs)

decoder_LSTM = LSTM(n_lstm_units, return_state=True, return_sequences=True)   
decoder_outputs, _, _ = decoder_LSTM(inputs, initial_state=[state_h, state_c])
outputs = TimeDistributed(Dense(n_dipoles, activation='linear'))(decoder_outputs)

model = tf.keras.Model(inputs=inputs, outputs=outputs, name='Encoder-Decoder')
model.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")
model.summary()
model.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.15)

In [None]:
n_samples = 2
settings = dict(duration_of_trial=0.25, extents=(1,2), number_of_sources=6, target_snr=(2, 15))

sim_test = Simulation(fwd, info, settings=settings).simulate(n_samples=n_samples)
print(sim_test.simulation_info.number_of_sources.values[0])
X_test, y_test = prep_data(sim_test)

y_hat = model.predict(X_test)[0]

stc = sim_test.source_data[0]
stc.plot(**plot_params)

stc_hat = stc.copy()
# stc_hat.data = comp.decode(y_hat.T)
stc_hat.data = y_hat.T

stc_hat.plot(**plot_params)


# Old LSTM

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, LSTM, GRU, multiply, Activation
from tensorflow.keras.regularizers import l1
from esinet.losses import nmae_loss
leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, n_dipoles = leadfield.shape
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')

n_dense_units = 300
n_lstm_units = 128
activation_function = "tanh"
batch_size = 32
epochs = 30
dropout = 0.2

inputs = tf.keras.Input(shape=(None, n_channels), name='Input')
fc1 = TimeDistributed(Dense(n_dense_units, 
            activation=activation_function), 
            name='FC1')(inputs)
fc1 = Dropout(dropout)(fc1)
lstm1 = Bidirectional(GRU(n_lstm_units, return_sequences=True, name='LSTM1'))(fc1)

direct_out = TimeDistributed(Dense(n_dipoles, 
            activation="linear"),
            name='FC2')(lstm1)


model2 = tf.keras.Model(inputs=inputs, outputs=direct_out, name='LSTM_Old')


model2.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")
model2.summary()
model2.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.15)

# FC

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, LSTM, GRU, multiply, Activation
from tensorflow.keras.regularizers import l1
from esinet.losses import nmae_loss
leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, n_dipoles = leadfield.shape
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')

n_dense_units = 300
n_lstm_units = 30
activation_function = "tanh"
batch_size = 32
epochs = 200
dropout = 0.1

inputs = tf.keras.Input(shape=(None, n_channels), name='Input')
fc1 = TimeDistributed(Dense(n_dense_units, 
            activation=activation_function), 
            name='FC1')(inputs)
direct_out = TimeDistributed(Dense(n_dipoles, 
            activation="linear"),
            name='FC2')(fc1)


model3 = tf.keras.Model(inputs=inputs, outputs=direct_out, name='FC')


model3.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")

model3.summary()
model3.fit(X, y, epochs=epochs, batch_size=batch_size, validation_split=0.15)

# CNN

In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, Conv2D, Flatten, Reshape

from tensorflow.keras.regularizers import l1
from esinet.losses import nmae_loss
leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, n_dipoles = leadfield.shape
n_time = X.shape[1]
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')

n_dense_units = 100
n_lstm_units = 30
activation_function = "tanh"
batch_size = 32
epochs = 200
dropout = 0.1
n_filters = int(n_channels / 2)
kernel_size = (1, n_dipoles)
dilation_rate = (2, 1)
new_shape = (n_time, n_dipoles, 1)

inputs = tf.keras.Input(shape=(X.shape[1], n_channels), name='Input')
# cnn = Conv2D(n_filters, kernel_size, dilation_rate=dilation_rate, padding="valid")(inputs)
# cnn = Reshape(new_shape)(cnn)


fc1 = TimeDistributed(Dense(n_dense_units, 
            activation=activation_function), 
            name='FC1')(inputs)

cnn = Reshape((n_time, n_dense_units, 1))(fc1)
cnn = Conv2D(n_filters, (3, 100), dilation_rate=dilation_rate, padding="same", activation=activation_function)(cnn)
cnn = Reshape((n_time, n_filters*n_dense_units))(cnn)

fc2 = TimeDistributed(Dense(300, 
            activation=activation_function),
            name='FC2')(cnn)

fc3 = TimeDistributed(Dense(n_dipoles, 
            activation="linear"),
            name='FC3')(fc2)

# cnn = Reshape(new_shape)(cnn)


model4 = tf.keras.Model(inputs=inputs, outputs=fc3, name='FC-CNN')


model4.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")

model4.summary()
model4.fit(X[:, :, :, np.newaxis], y[:, :, :, np.newaxis], epochs=epochs, batch_size=batch_size, validation_split=0.15)

In [None]:
n_samples = 2
settings = dict(duration_of_trial=0.2, extents=(1,40), number_of_sources=3, target_snr=(2, 15))

sim_test = Simulation(fwd, info, settings=settings).simulate(n_samples=n_samples)
print(sim_test.simulation_info.number_of_sources.values[0])
X_test, y_test = prep_data(sim_test)

y_hat = model4.predict(X_test)[0]

stc = sim_test.source_data[0]
stc.plot(**plot_params)

stc_hat = stc.copy()
# stc_hat.data = comp.decode(y_hat.T)
stc_hat.data = y_hat.T

stc_hat.plot(**plot_params)


# FC - context

In [8]:
import tensorflow as tf
from tensorflow.keras.layers import Dense, TimeDistributed, Bidirectional, Conv2D, Flatten, Reshape, Dropout, concatenate, RepeatVector
from tensorflow.keras import backend as K
from tensorflow.keras.regularizers import l1
from esinet.losses import nmae_loss
leadfield, pos = util.unpack_fwd(fwd)[1:3]
n_channels, n_dipoles = leadfield.shape
n_time = X.shape[1]
input_shape = (None, None, n_channels)
tf.keras.backend.set_image_data_format('channels_last')

n_dense_units = 300
n_lstm_units = 30
n_embed = 1
activation_function = "tanh"
batch_size = 32
epochs = 50
dropout = 0.1
n_filters = int(n_channels / 2)
kernel_size = (1, n_dipoles)
dilation_rate = (2, 1)
new_shape = (n_time, n_embed)

inputs = tf.keras.Input(shape=(X.shape[1], n_channels), name='Input')

context = Flatten()(inputs)
context = Dense(n_embed, activation=activation_function, name="context")(context)
# context = Reshape(new_shape)(context)
context = RepeatVector(n_time)(context)

context = concatenate([inputs, context])
# context = Dropout(0.2)(context)

fc1 = TimeDistributed(Dense(n_dense_units, 
            activation=activation_function), 
            name='FC1')(context)
# cat = concatenate([fc1, context])

fc2 = TimeDistributed(Dense(n_dipoles, 
            activation="tanh"),
            name='FC2')(fc1)



model5 = tf.keras.Model(inputs=inputs, outputs=fc2, name='FC-Context')
model5.add_loss(1e1*K.mean(K.abs(fc2)))

model5.compile(loss=tf.keras.losses.CosineSimilarity(), optimizer="adam")

model5.summary()
model5.fit(X[:, :, :, np.newaxis], y[:, :, :, np.newaxis], epochs=epochs, batch_size=batch_size, validation_split=0.15)

Model: "FC-Context"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 Input (InputLayer)             [(None, 20, 61)]     0           []                               
                                                                                                  
 flatten_1 (Flatten)            (None, 1220)         0           ['Input[0][0]']                  
                                                                                                  
 context (Dense)                (None, 1)            1221        ['flatten_1[0][0]']              
                                                                                                  
 repeat_vector_1 (RepeatVector)  (None, 20, 1)       0           ['context[0][0]']                
                                                                                         

<keras.callbacks.History at 0x267ccfdc490>

In [11]:
n_samples = 2
settings = dict(duration_of_trial=0.2, extents=(1,40), number_of_sources=3, target_snr=1e99)

sim_test = Simulation(fwd, info, settings=settings).simulate(n_samples=n_samples)
print(sim_test.simulation_info.number_of_sources.values[0])
X_test, y_test = prep_data(sim_test)

y_hat = model5.predict(X_test)[0]

stc = sim_test.source_data[0]
stc.plot(**plot_params)

stc_hat = stc.copy()
# stc_hat.data = comp.decode(y_hat.T)
stc_hat.data = y_hat.T

stc_hat.plot(**plot_params)


Simulating data based on sparse patches.


100%|██████████| 2/2 [00:00<00:00, 18.40it/s]
100%|██████████| 2/2 [00:00<00:00, 2003.97it/s]
100%|██████████| 2/2 [00:00<00:00, 69.15it/s]

3





<mne.viz._brain._brain.Brain at 0x267b751f310>

Using control points [0.03703328 0.04606918 0.12230216]
Using control points [0.02217556 0.02717292 0.07537197]
Using control points [0.00000000e+00 0.00000000e+00 2.87812185e-08]
Using control points [0.01065652 0.01170133 0.02240586]


# Eval

In [None]:
import numpy as np
from scipy.stats import pearsonr
from tqdm.notebook import tqdm
import sys; sys.path.insert(0, '../../invert/')
from invert.solvers.empirical_bayes import SolverChampagne

solver = SolverChampagne()
solver.make_inverse_operator(fwd)
n_samples = 2
settings = dict(duration_of_trial=0.2, number_of_sources=(1,10), extents=(1,40))
# settings = dict(duration_of_trial=0.25, number_of_sources=5, extents=(1,2))

sim_test = Simulation(fwd, info, settings=settings).simulate(n_samples=n_samples)
X_test, y_test = prep_data(sim_test)

# models = [model, model2, model3]
# aucs = []
# mles = []
# nmses = []
# for net in models:
#     y_hat = net.predict(X_test)
#     auc, nmse, mle = eval(y_test, y_hat)
#     aucs.append( auc )
#     nmses.append( nmse )
#     mles.append( mle )
#     print(f"{net.name}: \n\t{np.nanmedian(aucs[-1])} AUC \n\t{np.nanmedian(mles[-1])} mm \n\t{np.nanmedian(nmses[-1])} nMSE")

# y_hat = np.stack([solver.apply_inverse_operator(epochs.average()).data for epochs in tqdm(sim_test.eeg_data)], axis=0)
# y_hat = np.swapaxes(y_hat, 1, 2)
# auc, nmse, mle = eval(y_test, y_hat)
# aucs.append( auc )
# nmses.append( nmse )
# mles.append( mle )

# models.append(solver)
# print(f"{solver.name}: \n\t{np.nanmedian(aucs[-1])} AUC \n\t{np.nanmedian(mles[-1])} mm \n\t{np.nanmedian(nmses[-1])} nMSE")

idx = 0
n = sim_test.simulation_info["number_of_sources"].values[idx]
print(f"{n} sources")

# PLOTTING BRAINS
stc = sim_test.source_data[idx]
stc.plot(**plot_params, brain_kwargs=dict(title="Ground Truth"))

stc_hat = stc.copy()
y_hat = model5.predict(X_test)[idx]
stc_hat.data = y_hat.T
stc_hat.plot(**plot_params, brain_kwargs=dict(title=model5.name))
r = pearsonr(y_hat.flatten(), y_test[idx].flatten())[0]
print(f"{model5.name}: r={r:.2f}")


# y_hat = model2.predict(X_test)[idx]
# stc_hat = stc.copy()
# stc_hat.data = y_hat.T
# stc_hat.plot(**plot_params, brain_kwargs=dict(title=model2.name))
# r = pearsonr(y_hat.flatten(), y_test[idx].flatten())[0]
# print(f"{model2.name}: r={r:.2f}")

# y_hat = model3.predict(X_test)[idx]
# stc_hat = stc.copy()
# stc_hat.data = y_hat.T
# stc_hat.plot(**plot_params, brain_kwargs=dict(title=model3.name))
# r = pearsonr(y_hat.flatten(), y_test[idx].flatten())[0]
# print(f"{model3.name}: r={r:.2f}")


# evoked = sim_test.eeg_data[idx].average()
# stc_hat = solver.apply_inverse_operator(evoked)
# stc_hat.plot(**plot_params, brain_kwargs=dict(title=solver.name))
# y_hat = stc_hat.data
# r = pearsonr(y_hat.flatten(), y_test[idx].flatten())[0]
# print(f"{solver.name}: r={r:.2f}")


In [None]:
import seaborn as sns
names = [m.name for m in models]
xticks = (np.arange(len(models)), names)

plt.figure()
sns.boxplot(data=np.nanmean(aucs,axis=-1).T)
plt.xticks(*xticks)
plt.xlabel("AUC")
plt.title("AUC")

plt.figure()
sns.boxplot(data=np.nanmean(mles,axis=-1).T)
plt.xticks(*xticks)
plt.xlabel("MLE")
plt.title("MLE")

plt.figure()
sns.boxplot(data=np.nanmean(nmses,axis=-1).T)
plt.xticks(*xticks)
plt.xlabel("NMSE")
plt.title("NMSE")
