# <font color=black> Figure 1 Spinal cord morphometry </font>
<hr style="border:1px solid black">

### Imports

In [None]:
import glob, os, sys, json
import pandas as pd
import numpy as np
import seaborn as sns

main_dir="/cerebro/cerebro1/dataset/bmpd/derivatives/Aging_project/2025_brsc_aging_project"
sys.path.append(main_dir + "/code/")
from sc_structural_analyses import StructuralMetrics
from sim_matrix import Matrix

%matplotlib inline
%load_ext autoreload
%autoreload 2

# Load config file ------------------------------------------------------------
config_file=main_dir + '/config/analyses/01_structural.json'
with open(config_file) as config_f: # the notebook should be in 'xx/notebook/' folder #config_proprio
    config = json.load(config_f) # load config file should be open first and the path inside modified

# plotting
from plotting import Plotting
import matplotlib.pyplot as plt
from matplotlib.colors import LinearSegmentedColormap
plot=Plotting(config_file,"test")

#statistics
from scipy import stats
from brsc_statistics import Statistics 
from scipy.stats import spearmanr
stat_func=Statistics(config=config,ana_dir="",analysis="")


# Atlas_labels ------------------------------
atlas_labels_f=glob.glob(config["project_dir"] + config['template']["spinalcord"]['atlas'].split("order")[0] + "labels.txt")[0]
atlas_labels=np.genfromtxt(atlas_labels_f, usecols=0,skip_header=1, dtype="S", delimiter="\t", encoding=None)
atlas_labels = np.array([label.decode("utf-8") for label in atlas_labels])


<hr style="border:1px solid black">

## <font color=#0B7CC3> A. Extract features

In [None]:
sub_metrics={"MTR":["MTR"],"T2s":["T2s_vx_gm","T2s_vx_wm"],"DWI":["FA","RD","AD","MD"]}
#sub_metrics={"T2s":["T2s_vx_gm","T2s_vx_wm"]}

df_metrics={}
atlas_f=[]

for contrast in sub_metrics:
    df_metrics[contrast]={}
    mask_path={}
    for sub_metric in sub_metrics[contrast]:
        print(sub_metric)
        files_indivspace=[]
        if len(sub_metrics[contrast])>1:
            metrics=StructuralMetrics(config,contrast=contrast,metric=sub_metric) # initialize the function
        else:
            metrics=StructuralMetrics(config,contrast=contrast)
        atlas_f=[]
        for ID_nb, ID in enumerate(config["participants_IDs_" + contrast]):
            preproc_dir= config["preprocess_dir"]["bmpd"] if ID[0]=="P" else config["preprocess_dir"]["stratals"]
            if contrast=="T2s":
                if sub_metric=="T2s_gm" or sub_metric=="T2s_wm":
                    atlas_f.append(preproc_dir + config["indiv_space"][sub_metric + "_atlas"].format(ID))
                    files_indivspace.append(glob.glob(preproc_dir+ config["indiv_space"][contrast].format(ID))[0])
                
                else:
                    atlas_f.append(preproc_dir + config["indiv_space"][contrast+ "_atlas"].format(ID))
                    files_indivspace.append(glob.glob(preproc_dir+ config["indiv_space"][sub_metric].format(ID))[0])
                
            elif contrast=="MTR":
                files_indivspace.append(glob.glob(preproc_dir+ config["indiv_space"][contrast].format(ID))[0])
                
                if sub_metric=="MTR_gm" or sub_metric=="MTR_wm":
                    atlas_f.append(preproc_dir + config["indiv_space"][sub_metric + "_atlas"].format(ID))

                else:
                    atlas_f.append(preproc_dir + config["indiv_space"][contrast + "_atlas"].format(ID))

            elif contrast=="DWI":
                files_indivspace.append(glob.glob(preproc_dir+ config["indiv_space"][sub_metric].format(ID))[0])
                atlas_f.append(preproc_dir + config["indiv_space"][contrast + "_atlas"].format(ID))

        measure="mean"
        if sub_metric=="T2s_vx_gm" or sub_metric=="T2s_vx_wm":
            measure="count"


        df_metrics[contrast][sub_metric]=metrics.extract_metric_rois(IDs=config["participants_IDs_" + contrast],
                            input_f=files_indivspace,
                            atlas_f=atlas_f,
                            atlas_labels=atlas_labels,
                            metric=sub_metric,
                            measure=measure,
                            space="indiv_space",
                            tag=""  ,norm=False,redo=False,verbose=1)

    
        df_metrics[contrast][sub_metric]=df_metrics[contrast][sub_metric][df_metrics[contrast][sub_metric]["levels"]!="C1"].reset_index(drop=True)
        df_metrics[contrast][sub_metric]["rois_couple"]=df_metrics[contrast][sub_metric]["ventro_dorsal"] + "_" + df_metrics[contrast][sub_metric]["right_left"]
    if contrast == "T2s" :
        if all(x in sub_metrics["T2s"] for x in ["T2s_gm", "T2s_wm"]):
            gm = df_metrics["T2s"]["T2s_gm"].get("T2s_gm")
            wm = df_metrics["T2s"]["T2s_wm"].get("T2s_wm")
            df_metrics["T2s"]["T2s_gm"]["wm_gm_r"] = wm / gm

    if contrast == "MTR" :
        if all(x in sub_metrics[contrast] for x in ["MTR_gm", "MTR_wm"]):
            gm = df_metrics[contrast]["MTR_gm"].get("MTR_gm")
            wm = df_metrics[contrast]["MTR_wm"].get("MTR_wm")
            df_metrics[contrast]["MTR_gm"]["wm_gm_r"] =  wm / gm


## Create group level nifti image

In [None]:
sub_metrics={"MTR":["MTR"],"T2s":["T2s"],"DWI":["FA","RD","AD","MD"]}
import brsc_utils

for contrast in sub_metrics:
    output_dir=config["project_dir"] + config["analysis_dir"][contrast] + "2_second_level/nifti/"
    os.makedirs(output_dir,exist_ok=True)
    for sub_metric in sub_metrics[contrast]:
        
        for group in ["ALL","YA","MA","OA"]:
            i_files_dir=[]
            commun_ids=sorted(set(config["participants_IDs_" + contrast]) & set(config["participants_IDs_" + group]))
            for ID_nb, ID in enumerate(commun_ids):
                preproc_dir= config["preprocess_dir"]["bmpd"] if ID[0]=="P" else config["preprocess_dir"]["stratals"]
                split=config["PAM50_space"][sub_metric].format(ID).split("/")[0:-1][-1]
                i_files_dir.append(preproc_dir+ config["PAM50_space"][sub_metric].format(ID).split(split)[0]+f"/{split}/")
                
            brsc_utils.group_mean_img(IDs=commun_ids,
                              i_dir=i_files_dir,
                              o_dir=output_dir,
                              prefix_tag="sub-",
                              suffix_tag=config["PAM50_space"][sub_metric].format(ID).split("/")[-1].split(".nii.gz")[0],
                              remove_4d=True,
                             tag=sub_metric +"_" + group,
                             redo=False)

<hr style="border:1px solid black">

## <font color=#0B7CC3> C. Compute similarity matrix

In [None]:
# Step 1: Find common IDs
common_ids = sorted(set(config["participants_IDs_T2s"]) & set(config["participants_IDs_MTR"]) & set(config["participants_IDs_DWI"]))

# Step 2: Filter each DataFrame to only keep rows with those common IDs
df_all=df_metrics["MTR"]["MTR"][df_metrics["MTR"]["MTR"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_gm=df_metrics["T2s"]["T2s_vx_gm"][df_metrics["T2s"]["T2s_vx_gm"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_wm=df_metrics["T2s"]["T2s_vx_wm"][df_metrics["T2s"]["T2s_vx_wm"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_fa=df_metrics["DWI"]["FA"][df_metrics["DWI"]["FA"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_rd=df_metrics["DWI"]["RD"][df_metrics["DWI"]["RD"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_md=df_metrics["DWI"]["MD"][df_metrics["DWI"]["MD"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_ad=df_metrics["DWI"]["AD"][df_metrics["DWI"]["AD"]["IDs"].isin(common_ids)].reset_index(drop=True)
df_all["gm"]=df_gm["T2s_vx_gm"];df_all["wm"]=df_wm["T2s_vx_wm"];#df_all["gm_wm_r"]=df_gmwm["gm_wm_r"]
df_all["FA"]=df_fa["FA"];df_all["RD"]=df_rd["RD"]
df_all["MD"]=df_md["MD"];df_all["AD"]=df_ad["AD"]
df_all_ventral=df_all[df_all['ventro_dorsal']=="ventral"]
df_all_dorsal=df_all[df_all['ventro_dorsal']=="dorsal"]

In [None]:
atlas_labelsC2C7=atlas_labels[4:] # remove C1 level 
common_ids = sorted(set(config["participants_IDs_T2s"]) & set(config["participants_IDs_MTR"]) & set(config["participants_IDs_DWI"]))
output_dir=config["project_dir"] +  "/figures/f01_structural/preprint2025/similarity/"
ana_matrix=Matrix(config=config,IDs=common_ids,output_dir=output_dir)
col_selected = ["MTR","AD","MD","RD","FA","gm","wm"]


all_sim_matrix, mean_sim_matrix,df_sim=ana_matrix.compute_similarity_matrix(data_df=df_all,column_labels=col_selected ,atlas_labels=atlas_labelsC2C7,
                                                                            scaling_method="robust_sigmoid",redo=False)

colors = ["#F2ECDF","#C3E1DD","#93D6DB","#62B7D9","#3197D6","#0B7CC3","#0168A7"]  # blue → white → red
custom_cmap = LinearSegmentedColormap.from_list("my_colormap", colors)
output_dir=config["project_dir"] +  "/figures/f01_structural/preprint2025/figures/"
plot.plot_heatmap(matrix=mean_sim_matrix,
                  vmax=1,vmin=-0.2,
                  cmap=custom_cmap,
            xticklabels=atlas_labelsC2C7,yticklabels=atlas_labelsC2C7,
                    labels=atlas_labels,
                  output_f=output_dir + '/figures/sim_matrix_morpho.pdf',
                  save=False)
plt.show()



In [None]:

mean_df=df_sim.groupby(["IDs","age","sex","betwith_labels"])[["sim"]].mean().reset_index()
# ---- compute within versus between t-test
t_test=stats.ttest_rel(mean_df[mean_df["betwith_labels"]=="intra"]["sim"],mean_df[mean_df["betwith_labels"]=="inter"]["sim"])
print("t(" + str(t_test.df) + "): " + str(np.round(t_test.statistic,2)) + " p-value: " + str(t_test.pvalue))

# ---- plot within and between similarity
plot.boxplots(df=mean_df,
              x_data="betwith_labels",x_order=["intra","inter"],
                  indiv_values=True,#invers_axes=True,
                  palette=["#0168A7","#93D6DB"],#output_dir=config['main_dir'] + config['analysis_dir']['spinalcord'] + '/figures/',
                  #output_tag='corr_' + group_name,
              height=5,aspect=0.3,
                  y_data="sim",
              ymin=-0.2, ymax=1.2,
              output_dir=output_dir + '/figures/',
              output_tag="sim_intra-inter",
              save=False)
plt.show()

<hr style="border:1px solid black">

## <font color=#0B7CC3> D. Age and sex effects


In [None]:
sub_metrics={"MTR":["MTR"],"T2s":["T2s_vx_gm","T2s_vx_wm"],"DWI":["FA","RD","AD","MD"]}


df_mean_metrics={};
mean_results = [];roi_results=[];

for contrast in sub_metrics:
    df_mean_metrics[contrast]={}
    for sub_metric in sub_metrics[contrast]:
        
        if sub_metric == "T2s_wm":
            continue  # skip this sub_metric and move to the next one
        
        df_mean_metrics[sub_metric]={}
        #df_mean_metrics[contrast][sub_metric]=df_metrics[contrast][sub_metric].groupby(['IDs', 'groups', 'age', 'sex'], as_index=False)[contrast].mean()
        if sub_metric in ["T2s_gm", "MTR_gm"]:
            y = "wm_gm_r"
        else:
            y = sub_metric
        
        signed_r2, p_age,p_sex, beta_age, beta_sex, stat_age,stat_sex=stat_func.signed_partial_r2(df=df_metrics[contrast][sub_metric].dropna(),y=y,predictor="age",covariates=["sex"],random='IDs')
        mean_results.append({"contrast": contrast,"sub_metric": y,"signed_r2": signed_r2,"p_age": p_age,"p_sex": p_sex,"beta_age": beta_age,"beta_sex": beta_sex,"tvalue_age": stat_age,"tvalue_sex": stat_sex})

        for roi in np.unique(df_metrics[contrast][sub_metric]["rois_couple"]):
            df_roi_metrics=df_metrics[contrast][sub_metric][df_metrics[contrast][sub_metric]["rois_couple"]==roi]
            signed_r2_2, p_age2,p_sex2, beta_age2,beta_sex2, stat_age2,stat_sex2=stat_func.signed_partial_r2(df=df_roi_metrics,y=y,predictor="age",covariates=["sex"])
            roi_results.append({"contrast": contrast,"sub_metric": sub_metric,"rois":roi,"signed_r2": signed_r2_2,"p_age": p_age2,"p_sex": p_sex2,"beta_age": beta_age2,"beta_sex": beta_sex2,"tvalue_age": stat_age2,"tvalue_sex": stat_sex2})

# Convert to DataFrame
mean_result_df= pd.DataFrame(mean_results)
roi_result_df=pd.DataFrame(roi_results)
significant_features = roi_result_df.groupby("sub_metric").filter(lambda g: (g["p_age"] < 0.05).any())["sub_metric"].unique()

for roi in np.unique(df_metrics[contrast][sub_metric]["rois_couple"]):
    print(roi + " absolute mean t-value: " + str(np.round(np.mean(np.abs(roi_result_df[(roi_result_df["rois"]==roi)
                  &(roi_result_df["sub_metric"].isin(["MTR","T2s_vx_gm","T2s_vx_wm","FA","RD","AD","MD"]))]["tvalue_age"])),3)) + " ± " +
         str(np.round(np.std(np.abs(roi_result_df[(roi_result_df["rois"]==roi)
                  &(roi_result_df["sub_metric"].isin(["MTR","T2s_vx_gm","T2s_vx_wm","FA","RD","AD","MD"]))]["tvalue_age"])),2)))


In [90]:
from statsmodels.stats.multitest import multipletests

# Step 1: extract p-values
p_age = roi_result_df["p_age"].values
p_sex = roi_result_df["p_sex"].values

# Step 2: apply FDR correction
reject, page_corrected, _, _ = multipletests(p_age, method='fdr_bh')
reject, psex_corrected, _, _ = multipletests(p_sex, method='fdr_bh')

# Step 3: add results to your DataFrame
roi_result_df["page_fdr"] = page_corrected
roi_result_df["psex_fdr"] = psex_corrected

In [None]:
pivot = roi_result_df[(roi_result_df["sub_metric"]!="T2s")].pivot(index="rois", columns="sub_metric", values="tvalue_age")
feat_order=["MTR","T2s_vx_gm","T2s_vx_wm","FA","RD","AD","MD"]
colors = ["#2C1562","#61508A","#6F5F94","#968AB1","#FFFFFF","#FACF80","#F7B740","#DB9930","#B4730A"]
          
 # blue → white → red
#colors = ["#2C1562","#61508A","#968AB1","#FFFFFF","#FACF80","#F7B740","#F49E00"FCB44A",]  # blue → white → red

custom_cmap = LinearSegmentedColormap.from_list("my_colormap", colors)

pivot = pivot.reindex(columns=feat_order)

plt.figure(figsize=(6, 3))
sns.heatmap(pivot, cmap=custom_cmap, center=0, vmin=-5, vmax=5, fmt=".2f")
plt.title("T-values per ROI and Metric")
plt.tight_layout()
#plt.savefig(output_dir + 'age_effect_tvalues_quad.pdf', format='pdf')
plt.show()

In [81]:
# compute similarity for significant features only
col_selected = ['MTR','wm', 'gm', 'FA', 'RD', 'AD']
all_sim_matrix_age, mean_sim_matrix_age,df_sim_age=ana_matrix.compute_similarity_matrix(data_df=df_all,column_labels=col_selected ,atlas_labels=atlas_labelsC2C7,
                                                                                        scaling_method="robust_sigmoid",
                                                                                        tag="_signif_age",redo=True)


/cerebro/cerebro1/dataset/bmpd/derivatives/Aging_project//figures/f01_structural/preprint2025/similarity/1_first_level/sim_matrix/sim_matrix_df_signif_age.csv


  mean_sim_matrix = np.nanmean(np.stack(all_sim_matrix), axis=0)


###  <font color=#16A89A> B. Predictive models