Display the different models performance over each simulation

In [None]:
import os 
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

custom_params = {"axes.spines.right": False, "axes.spines.top": False, "axes.spines.left": False,
                 "axes.spines.bottom": False, "figure.dpi": 100}
sns.set_theme(style = "whitegrid", rc = custom_params, font_scale = 1.75)

In [None]:
path = 'results/'

In [None]:
# Naming for all 
models = {
    'correct': 'Correct', 
    'mm': 'LMM',
    'all': 'All', 
    'outcomes': r'Mis. $y$', 
    'sigma': r'Mis. $\omega$', 
    'nore': r'No $u^\omega$',
    'sigmanore': r'No $u^\omega$ + Mis. $\omega$',
    'melsm_notime': 'No slopes',
    'melsm_notimeomega': 'No $u^\omega_{age}$',
    'gaussian': 'Gaussian', 
    'nonsinus': 'Non sinus',
    'incorrect': r'Uncorrelated ($u^\omega$, $u^y$)'
    }

covariates = {
    # Covariates for mean
    'b_age': (r'$\beta^y[Age]$', 0.5),
    'b_albumin': (r'$\beta^y[Albumin]$', 0.5),
    'b_trig': (r'$\beta^y[Trig]$', 0),
    'b_platelet': (r'$\beta^y[Platelet]$', 0),

    'sd_id__Intercept': (r'$\sigma_{y}$', 1),

    # Covariates for variance
    'b_sigma_age': (r'$\beta^{\omega}[Age]$', 0.8),
    'b_sigma_albumin': (r'$\beta^{\omega}[Albumin]$', 0),
    'b_sigma_trig': (r'$\beta^{\omega}[Trig]$', 0.8),
    'b_sigma_platelet': (r'$\beta^{\omega}[Platelet]$', 0),

    'sd_id__sigma_Intercept': (r'$\sigma_{\omega}$', 0.5),
    'sd_id__sigma_age': (r'$\sigma_{\omega}[Age]$', 0.25)
    }

In [None]:
# Error to analyze
parameter = 'b_age'

In [None]:
naming, value = covariates[parameter]

In [None]:
def evaluate(path, parameter = parameter):
    print(path, ' - ', len(os.listdir(path)))

    estimates, ci, esterror, coverage = {}, {}, {}, {}

    # Evaluate by iterating across folder structure and averaging for each model types
    for replication in os.listdir(path):
        model_path = path + replication + '/'
        for model in os.listdir(model_path):
            if '.csv' not in model: continue

            csv_file = pd.read_csv(model_path + model, index_col = 0)
            model = models[model[:-4]] # Remove .csv extension
            if model not in estimates:
                estimates[model], ci[model], esterror[model], coverage[model] = [], [], [], []
                
            if parameter in csv_file.index: 
                csv_file = csv_file.loc[parameter]             
                estimates[model].append(csv_file['Estimate'])
                ci[model].append(csv_file['Q97.5'] - csv_file['Q2.5'])
                esterror[model].append(csv_file['Est.Error'])
                coverage[model].append((csv_file['Q97.5'] > value) & (value > csv_file['Q2.5']))

    return estimates, ci, esterror, coverage

In [None]:
estimates, ci, esterror, coverage = {}, {}, {}, {}
for experiment in os.listdir(path):
    for number in os.listdir(path + experiment):
        # Check if directory
        if experiment in ['Individuals', 'Points', 'Correlated Random Effect']:
            estimates[(experiment, int(number))], ci[(experiment, int(number))], \
            esterror[(experiment, int(number))], coverage[(experiment, int(number))] = evaluate(path + experiment + '/' + number + '/')
        else:
            estimates[(experiment, -1)], ci[(experiment, -1)], \
            esterror[(experiment, -1)], coverage[(experiment, -1)] = evaluate(path + experiment + '/')
            break   

estimates, ci, esterror, coverage = pd.DataFrame(estimates), pd.DataFrame(ci), pd.DataFrame(esterror), pd.DataFrame(coverage)
estimates.index.name, ci.index.name, esterror.index.name, coverage.index.name = 'Models', 'Models', 'Models', 'Models'

# Display tables

In [None]:
# Std of estimates
estimates.map(np.std)

In [None]:
# Average of std
esterror.map(np.mean)

In [None]:
# Average CI
ci.map(lambda x: '{:.2f} ({:.2f})'.format(np.mean(x), np.std(x)))

# Displays error

In [None]:
display[parameter]

In [None]:
for simulation in estimates.columns.get_level_values(0).unique():
	shift = 0.025
	if simulation != 'Individuals' and simulation != 'Points':
		display = estimates[simulation][estimates.index != 'LMM']
	else:
		display = estimates[simulation]
	display = display.dropna()

	order = [m for m in models.values() if m in display.index.unique()][::-1]
	hue = simulation if len(display.columns) > 1 else None

	# Transform in long format
	display = display.apply(pd.Series.explode).melt(var_name = simulation, value_name = naming, ignore_index = False).reset_index()
	display = display if hue else display.drop(simulation, axis = 1)	
	ax = sns.boxplot(display, x = naming, y = "Models", order = order, hue = hue, color='tab:blue' if not hue else None) 

	yticks = ax.get_yticks()
	if simulation in display.columns:
		hue_values = display[simulation].unique()
		summary_values = display.groupby(["Models", simulation]).max()
		# Extract box positions
		box_positions = {}
		for i, m in enumerate(order):
			for j, h in enumerate(hue_values):
					offset = (j - (len(hue_values) - 1) / 2) * 0.2  # Adjust for hue groups
					box_positions[(m, h)] = yticks[i] + offset
	else:
		summary_values = display.groupby(["Models"]).max()
		# Extract box positions
		box_positions = {}
		for i, m in enumerate(order):
			box_positions[(m, -1)] = yticks[i]

	# Annotate at the right end of each box
	for m, h in box_positions.keys():
		val = np.mean(coverage[simulation, h].loc[m])
		if ~np.isnan(val): 
			ax.text(display[naming].max(), box_positions[(m, h)], f'{val:.0%}', ha='left',
					fontsize=15, va='center', color='black', 
					backgroundcolor='white')

	plt.xlim(max(0, ax.get_xlim()[0]), max(display[naming].max() + shift * 3, ax.get_xlim()[1]))
	plt.axvline(value, color = 'tab:red', linestyle = '--')
	if hue:
		plt.legend(title = simulation, loc='center left', bbox_to_anchor=(1, 0.5))

	plt.title(simulation)
	plt.savefig('images/{}/{}.png'.format(simulation, naming), bbox_inches='tight')
	plt.show()