# Residuals computation for simulated data (Fig. S.6)

residuals = y - (slope * x + intercept)

mean_residual = np.mean(np.abs(residuals))

In [None]:
import sys
from pathlib import Path

import numpy as np
import pandas as pd

import plotly.express as px

In [22]:
repo_root = Path().resolve().parent  # notebooks folder’s parent
sys.path.append(str(repo_root))

In [23]:
import functions.data_analysis as analysis

## Retrieve data

In [24]:
mypath = '/Users/alexandrine/Library/CloudStorage/OneDrive-TechnischeUniversitätBerlin/causal_detection_of_CSD/results_causalCSD/saddle-node/methods/Apr25/causalEE/'

In [25]:
#load data
base_data_dict = analysis.load_and_rename_files(mypath, 'base-*.dat')
confounderDecreasAC_data_dict = analysis.load_and_rename_files(mypath, 'confounderDecreasAC-*.dat')
confounderIncreasAC_data_dict = analysis.load_and_rename_files(mypath, 'confounderIncreasAC-*.dat')
falseAlarm_data_dict = analysis.load_and_rename_files(mypath, 'falseAlarm-*.dat')

In [26]:
all_data_dicts = {
    'base': base_data_dict,
    'confounderDecreasAC': confounderDecreasAC_data_dict,
    'confounderIncreasAC': confounderIncreasAC_data_dict,
    'falseAlarm': falseAlarm_data_dict
}

## Aggregate residuals for each model

In [27]:
sigma_Y = "001"
methods=['KNeighborsRegressor',
         ]
window_length = "1000"
detrend='rmvDet'

In [28]:
# Choose which models and X_function to analyze
models = ['base', 
          'confounderDecreasAC', #'confounderIncreasAC', 
          'falseAlarm'
          ]

X_functions = ['linear', 
    #'square', 
               ]

In [29]:
def aggregate_residuals_causalEE(method='KNeighborsRegressor', window_length=1000):
    residuals_agg_dict = {}
    #std_devs_agg_dict = {}
    #ratio_agg_dict = {}


    for i, X_function in enumerate(X_functions, start=1):
        for j, model in enumerate(models, start=1):
            model_data_dict = all_data_dicts[model]
            var_name = f"{model}_{X_function}_{sigma_Y}_{method}_{detrend}_{window_length}"

            residuals_agg_dict[f"{model}_{X_function}"] = []
            #std_devs_agg_dict[f"{model}_{X_function}"] = []
            #ratio_agg_dict[f"{model}_{X_function}"] = []

            for k in range(len(model_data_dict[var_name]['residuals'])):
                residuals_agg_dict[f"{model}_{X_function}"].append(model_data_dict[var_name]['residuals'][k])
                #std_devs_agg_dict[f"{model}_{X_function}"].append(model_data_dict[var_name]['std_devs'][k])
                #ratio_agg_dict[f"{model}_{X_function}"].append([r/sd for r, sd in zip(model_data_dict[var_name]['residuals'][k], model_data_dict[var_name]['std_devs'][k])])

    return residuals_agg_dict

In [30]:
residuals_agg_dict = aggregate_residuals_causalEE(method='KNeighborsRegressor')

## Plot

In [31]:
def plot_residuals_boxplot_causalEE(agg_dict, method='KNeighborsRegressor'):

    data = []

    for key, value in agg_dict.items():
        residuals = np.concatenate(value) # residuals for each simulation are concatenated into one array
        data.extend([(key, r) for r in residuals])
    
    df = pd.DataFrame(data, columns=["Key", "Residuals"])

    fig = px.box(
        df,
        x="Key",
        y="Residuals",
        points="outliers",
        title=f"{method} - Residuals Box Plot",
    )
    fig.update_traces(marker_color='#AFDDE9', line_color = '#37ABC8', boxmean=False)
    fig.update_layout(width=800, height=600,
        xaxis_title="Scenario",
        yaxis_title="Residuals",
        plot_bgcolor='rgba(215, 238, 244, 0.3)'
        #template="plotly_white",
    )
    return fig


In [32]:
fig = plot_residuals_boxplot_causalEE(residuals_agg_dict, method='KNeighborsRegressor')

In [33]:
#fig.show()