## Supplementary: Long-Short Connectivity Table

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from statannot import add_stat_annotation
import statsmodels.api as sm
import statsmodels.formula.api as smf
from scipy import stats
from statannotations.Annotator import Annotator
import itertools 

# ignore warnings for easier plotting
import warnings
warnings.filterwarnings("ignore")

from load_data import load_data, return_asterisks_p

In [None]:
# Hue colors order and names
# auxiliar dict to put correct order in the figure
map_hue_color = {
    "G_norm": ['G_z<=0', 'G_z>0'], 
    "Corr_norm": ['C_z<=0', 'C_z>0'],
    "disease": ["HC", "MS"],
    "GROUP": ["HC", "CIS", "RRMS", "SPMS", "PPMS"],
    "CENTER": ["CLINIC", "MAINZ", "MILAN", "NAPLES", "OSLO", "LONDON", "AMSTERDAM"],
}

# get the progressives together.
mapping_prog = {
    "HC": "HC",
    "CIS": "CIS", 
    "RRMS": "RRMS", 
    "SPMS": "PMS", 
    "PPMS": "PMS"
}

df_merged = load_data()
print(df_merged.columns.values)

df_merged["q_Comm_spl"] = df_merged["Comm_ratio"] / df_merged["SC_spl_full"]
df_merged["q_Comm_eff"] = df_merged["Comm_ratio"] / df_merged["SC_eff_full"]
df_merged["q_CC_spl"] = df_merged["CC_ratio_area"] / df_merged["SC_spl_full"]
df_merged["q_CC_eff"] = df_merged["CC_ratio_area"] / df_merged["SC_eff_full"]


In [5]:

## 4.2 Direct partial correlations between selected biomarkers and G/cs
## Here it is part of what we want to do with 5.2 without correction, so no need
def partial_corr(df, dependent_vars, cs=False):
    """ 
    Do I generate the figures or only the tables?
    Generate the table and small number of figures (maybe only for one or two biomarkers)
    """
    structural_dmg_vars = ["EDSS", "SDMT"]

    df_results_glm = pd.DataFrame()

    for biomarker in structural_dmg_vars:
        results_to_save = {}
        for xvar in dependent_vars:
            df_figure = df.dropna(subset=[biomarker,xvar])

            results = smf.ols(f'{biomarker} ~ AGE + C(SEX) + C(CENTER)', data=df_figure).fit()
            df_figure[f"{biomarker}_C"] = results.resid
            
            results = smf.ols(f'{xvar} ~ AGE + C(SEX) + C(CENTER)', data=df_figure).fit()
            df_figure[f"{xvar}_C"] = results.resid

            r, p = stats.pearsonr(df_figure[f"{biomarker}_C"], df_figure[f"{xvar}_C"])
            ast = return_asterisks_p(p)
            #str_save = f"r={r:.2f}{ast}"
            str_save = f"r={r:.2f}, pval={p:.1e}{ast}"
            results_to_save[xvar] = str_save

        dict_to_append = {x: results_to_save[x] for x in dependent_vars}
        dict_to_append = {'Biomarker': biomarker, **dict_to_append}
        df_results_glm = df_results_glm.append(dict_to_append, ignore_index=True)

    # remove index from df
    df_results_glm = df_results_glm.reset_index(drop=True)
    df_results_glm = df_results_glm.set_index('Biomarker')
    # df_results_glm = df_results_glm.style.set_caption(f'{title}: partial correlations')
    # display(df_results_glm)
    return df_results_glm.T



In [None]:
list_of_features = ["SC_corr_q1", "SC_corr_q4", "FC_avg_spl", "FC_efficiency", "FC_corr_q1", "FC_corr_q4", "short_FCSC", "long_FCSC"]

df_results = partial_corr(df_merged, list_of_features, False)

# print latex
print(df_results.to_latex(escape=False))