In [None]:
import csv
import os
import random
import pickle
import pandas as pd
import numpy as np
from scipy import stats
import matplotlib.pyplot as plt
import matplotlib.ticker as ticker 
import sklearn
import warnings
import skbold
import glob
from sklearn.metrics import mean_squared_error, mean_absolute_error, r2_score

# dwMRI

# MISMATCH BETWEEN FOLDER NAME AND FILES IN STRUCTURAL MATRICES:

31024_Schaefer7n1000p_Tian_S4
- expected: 1000 + 54 = 1054 structures
- in fact: 216 structures = 200 + 16 (T1) => 31024_Schaefer7n200p_Tian_S1

31025_Schaefer7n200p_Tian_S1:
- expected: 200 + 16 = 216 structures
- in fact: 554 structures = 500 + 54 (T4) => 31025_Schaefer7n500p_Tian_S4

31026_Schaefer7n500p_Tian_S4
- expected: 500 + 54 = 516 structures
- in fact: 1054 structures = 1000 + 54 (T4)

# Combine and plot PLS results for DTI: IDP + Structural matrices

In [None]:
# Merge PLS results for all DTI modalities
folds = ["0", "1", "2", "3", "4"]
modalities_idp = ["fa_tbss",  "fa_prob", "md_tbss", "md_prob", "l1_tbss",
                   "l1_prob", "l2_tbss", "l2_prob", "l3_tbss", "l3_prob",
                   "mo_tbss", "mo_prob", "od_tbss", "od_prob", "icvf_tbss",
                   "icvf_prob", "isovf_tbss", "isovf_prob"]

five_folds_dti_idp = []
for modality in modalities_idp:
    for fold in folds:
        dti = pd.read_csv(f'/PLS/brain/dti/dti_idp/fold_{fold}/models/{modality}_fold_{fold}_PLS_result.csv', header=None)
        dti.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
        five_folds_dti_idp.append(dti)
        five_folds_dti_idp_full = pd.concat(five_folds_dti_idp, ignore_index=False)

five_folds_dti_str = []
for fold in folds:
    files = sorted(glob.glob(f'/PLS/brain/dti/dti_struct/fold_{fold}/models/csv/*_fold_{fold}_PLS_result.csv'))
    for file in files:
        struct = pd.read_csv(file, header=None)
        struct.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
        five_folds_dti_str.append(struct)
        five_folds_dti_struct_full = pd.concat(five_folds_dti_str, ignore_index=False)

dti_full = pd.concat
dti_full = pd.concat([five_folds_dti_idp_full, five_folds_dti_struct_full], axis=0, ignore_index=True).round(decimals=3)

dti_full['Pearson r'] = dti_full['Pearson r'].astype(str).str.replace(r'PearsonRResult\(statistic=|pvalue=|\)', '', regex=True)
dti_full[['Pearson r', 'p-value']] = dti_full['Pearson r'].str.split(',', expand=True).astype(float).round(decimals=3)
dti_full['n components'] = dti_full['n components'].astype(str).str.replace(r"{'n_components':", '', regex=True)
dti_full['n components'] = dti_full['n components'].astype(str).str.replace(r"}", '', regex=True)
dti_full

In [None]:
# Rename DTI modalities
modalities_dti_idp = ["fa_tbss",  "fa_prob", "md_tbss", "md_prob", "l1_tbss",
                   "l1_prob", "l2_tbss", "l2_prob", "l3_tbss", "l3_prob",
                   "mo_tbss", "mo_prob", "od_tbss", "od_prob", "icvf_tbss",
                   "icvf_prob", "isovf_tbss", "isovf_prob"]

new_names = ["FA TBSS", "FA Prob.", "MD TBSS", "MD Prob.", "L1 TBSS",
                   "L1 Prob.", "L2 TBSS", "L2 Prob.", "L3 TBSS", "L3 Prob.",
                   "MO TBSS", "MO Prob.", "OD TBSS", "OD Prob.", "ICVF TBSS",
                   "ICVF Prob.", "ISOVF TBSS", "ISOVF Prob."]

for modality, new in zip(modalities_dti_idp, new_names):
    dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace(f'{modality}', f'{new}', regex=True)

struct_modal = ['31020', '31021', '31022', '31023', '31024', '31025']
new_names_dti = ['aparc_a2009s_MSA1', 'aparc_MSA1', 'Glasser_MSA1',
                 'Glasser_MSA4', 'Schaefer7n200p_MSA1', 'Schaefer7n500p_MSA4']

for struct, new_struct in zip(struct_modal, new_names_dti):
    dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace(f'{struct}', f'{new_struct}', regex=True)

    
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_connectome_fa', ' Connectome FA', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_connectome_mean_length', ' Connectome Mean Length', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_connectome_sift2', ' Connectome SIFT2', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_10M', '', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_connectome_streamline_count', ' Connectome Streamline Count', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_MSA1', ' MSA I', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('_MSA4', ' MSA IV', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('aparc_a2009s', 'aparc a2009s', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('L1_', 'L1 ', regex=True)
dti_full['Modality'] = dti_full['Modality'].astype(str).str.replace('Prob.', 'Probabilistic', regex=True)
dti_full.to_csv('/PLS/brain/DTI_PLS_Result_five_folds.csv', index=False)

In [None]:
with pd.option_context('display.max_rows', None):
    display(dti_full.sort_values(by='R2', ascending=True))

In [None]:
# Average across folds
dti_pls_full_mean = dti_full[['R2', 'Pearson r', 'Modality', 'MSE', 'MAE']]
dti_pls_full_mean.groupby(['Modality']).mean().round(3).sort_values(by='R2', ascending=False)

## Plot Pearson's r distributions for each DTI modality

In [None]:
dti_pls_plot = dti_full.copy()
dti_grouped = dti_pls_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
dti_grouped_sort = dti_pls_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
dti_new_index = dti_grouped_sort.index
dti_grouped_reindex = dti_grouped.reindex(dti_new_index)
dti_grouped_reindex = dti_grouped_reindex.reset_index(drop=True)
dti_grouped_reindex

In [None]:
dti_pls_stack_plot = dti_full.copy()
dti_grouped = dti_pls_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
dti_grouped_sort = dti_pls_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
dti_new_index = dti_grouped_sort.index
dti_grouped_reindex = dti_grouped.reindex(dti_new_index)
dti_grouped_reindex = dti_grouped_reindex.reset_index(drop=True)
dti_grouped_reindex 

In [None]:
dti_pls_plot.to_csv('/PLS/brain/plot/dti_pls_plot.csv', index=False)
dti_grouped_reindex.to_csv('/PLS/brain/plot/dti_grouped_reindex.csv', index=False)

### Dot/Lineplot

In [None]:
dti_struct = [
'aparc MSA I Connectome Streamline Count',
'aparc MSA I Connectome SIFT2',
'aparc a2009s MSA I Connectome Streamline Count',
'Schaefer7n200p MSA I Connectome Streamline Count',
'Schaefer7n200p MSA I Connectome SIFT2',
'aparc MSA I Connectome FA',
'aparc a2009s MSA I Connectome FA',
'aparc MSA I Connectome Mean Length',
'aparc a2009s MSA I Connectome SIFT2',
'Schaefer7n200p MSA I Connectome FA',
'aparc a2009s MSA I Connectome Mean Length',
'Glasser MSA IV Connectome SIFT2',
'Glasser MSA IV Connectome Streamline Count',
'Glasser MSA IV Connectome FA',
'Schaefer7n500p MSA IV Connectome SIFT2',
'Schaefer7n500p MSA IV Connectome Streamline Count',
'Glasser MSA I Connectome FA',
'Schaefer7n200p MSA I Connectome Mean Length',
'Glasser MSA I Connectome Streamline Count',
'Schaefer7n500p MSA IV Connectome FA',
'Glasser MSA I Connectome SIFT2',
'Glasser MSA IV Connectome Mean Length',
'Glasser MSA I Connectome Mean Length',
'Schaefer7n500p MSA IV Connectome Mean Length'
]

dti_idp = [
'OD Probabilistic',
'MO Probabilistic',
'OD TBSS',
'ICVF Probabilistic',
'MO TBSS',
'ISOVF Probabilistic',
'FA Probabilistic',
'L2 Probabilistic',
'L1 Probabilistic',
'ISOVF TBSS',
'L3 Probabilistic',
'MD TBSS',
'MD Probabilistic',
'L2 TBSS',
'L3 TBSS',
'ICVF TBSS',
'L1 TBSS',
'FA TBSS'
]

### Add stacked model

In [None]:
dti_pls_stack_grouped = dti_pls_stack_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
dti_pls_stack_grouped_sort = dti_pls_stack_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
dti_pls_stack_new_index = dti_pls_stack_grouped_sort.index
dti_pls_stack_grouped_reindex = dti_pls_stack_grouped.reindex(dti_pls_stack_new_index)
dti_pls_stack_grouped_reindex = dti_pls_stack_grouped_reindex.reset_index(drop=True)
dti_pls_stack_grouped_reindex

In [None]:
plt.figure(figsize=(13, 15))

dti_full_r = dti_pls_stack_plot.copy()
dti_full_grouped_reindex_r = dti_pls_stack_grouped_reindex.copy()

# Create an empty plot without boxplot
for i, modality in enumerate(dti_pls_stack_grouped_reindex['Modality']):
    y = [i + 1] * len(dti_pls_stack_plot[dti_pls_stack_plot['Modality'] == modality]['Pearson r'])
    x = dti_pls_stack_plot[dti_pls_stack_plot['Modality'] == modality]['Pearson r']

    if modality in dti_struct:
        color = 'mediumaquamarine' #seagreen
        marker='D'
        s=90
    elif modality in dti_idp:
        color = 'steelblue'
        marker= 'o'
        s=90
    else:
        color = 'red'
        marker= '*'
        s=200
        
    plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3,  alpha=0.8) #setting zorder=3 for the plt.scatter function ensures that the dots are drawn above the boxplot elements
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(dti_pls_stack_plot[dti_pls_stack_plot['Modality'] == modality]['Pearson r'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)

    if modality == 'dwMRI Stacked (RF)':
        plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.02))
plt.yticks(range(1, len(dti_pls_stack_grouped_reindex['Modality']) + 1), dti_pls_stack_grouped_reindex['Modality'], fontsize = 15)
plt.xticks(fontsize = 15)
plt.xlabel("Pearson's $r$", fontsize = 20)
plt.title("PLSR Performance of dwMRI: Pearson's $r$", fontsize = 30)
plt.show()

### Do all the same for R2

In [None]:
dti_pls = pd.read_csv('/PLS/brain/DTI_PLS_Result_five_folds.csv')
dti_stack = pd.read_csv('/PLS/brain/stacking/DTI_All_stacked_five_folds.csv')
dti_stack = dti_stack[dti_stack['Algorithm'] == 'rf']
dti_stack_rf = dti_stack[['Fold', 'R2']]
dti_stack_rf['Modality'] = 'dwMRI Stacked (RF)'
dti_pls_plot = dti_pls[['Fold', 'R2', 'Modality']]
dti_pls_stack_plot_r2 = pd.concat([dti_pls_plot, dti_stack_rf], axis=0)

dti_pls_stack_grouped = dti_pls_stack_plot_r2.groupby('Modality')['R2'].apply(list).reset_index(name='R2 values')
dti_pls_stack_grouped_sort = dti_pls_stack_plot_r2.groupby('Modality')['R2'].mean().reset_index(name='Mean R2 values').sort_values(by='Mean R2 values', ascending = True)
dti_pls_stack_new_index = dti_pls_stack_grouped_sort.index
dti_pls_stack_grouped_reindex_r2 = dti_pls_stack_grouped.reindex(dti_pls_stack_new_index)
dti_pls_stack_grouped_reindex_r2 = dti_pls_stack_grouped_reindex_r2.reset_index(drop=True)

dti_pls_stack_plot_r2.to_csv('/PLS/brain/stacking/dti_pls_stack_plot_r2.csv', index=False)
dti_full_r2 = dti_pls_stack_plot_r2.copy()
dti_full_grouped_reindex_r2 = dti_pls_stack_grouped_reindex_r2.copy()


dti_pls_stack_grouped_reindex_r2

In [None]:
plt.figure(figsize=(13, 15))

# Create an empty plot without boxplot
for i, modality in enumerate(dti_pls_stack_grouped_reindex_r2['Modality']):
    y = [i + 1] * len(dti_pls_stack_plot_r2[dti_pls_stack_plot_r2['Modality'] == modality]['R2'])
    x = dti_pls_stack_plot_r2[dti_pls_stack_plot_r2['Modality'] == modality]['R2']

    if modality in dti_struct:
        color = 'mediumaquamarine' #seagreen
        marker='D'
        s=90
    elif modality in dti_idp:
        color = 'steelblue'
        marker= 'o'
        s=90
    else:
        color = 'red'
        marker= '*'
        s=200
        
    plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3,  alpha=0.8)
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(dti_pls_stack_plot_r2[dti_pls_stack_plot_r2['Modality'] == modality]['R2'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)

    if modality == 'dwMRI Stacked (RF)':
        plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.01))
plt.yticks(range(1, len(dti_pls_stack_grouped_reindex_r2['Modality']) + 1), dti_pls_stack_grouped_reindex_r2['Modality'], fontsize = 15)
plt.xticks(fontsize = 15)
plt.xlabel("$R^2$", fontsize = 20)
plt.title("PLSR Performance of dwMRI: $R^2$", fontsize = 30)
plt.show()

# rsMRI

# Combine and plot PLS results for rsMRI: RS IDP (tangent and amplitudes 21/55) + Parcellations (full correlation)

Rename files in the folder: Schaefer

In [None]:
# Rename files in the folder: Schaefer
folds = ['0', '1', '2', '3', '4']
for fold in folds:
    folder_path = f'/PLS/brain/rs/parcellation_main/UPDATE/fold_{fold}/result'
    for filename in os.listdir(folder_path):
        if '_arrays_' in filename:
            new_filename = filename.replace("_arrays_", "_")
            old_filepath = os.path.join(folder_path, filename)
            new_filepath = os.path.join(folder_path, new_filename)
            os.rename(old_filepath, new_filepath)
            
            print(f"Renamed {filename} to {new_filename}")

In [None]:
# Merge PLS results for RS
folds = ["0", "1", "2", "3", "4"]
atlases = [
'aparc_Tian_s1_full_correlation', 
'aparc_2009_Tian_s1_full_correlation',
'glasser_Tian_s1_full_correlation',
'glasser_Tian_s4_full_correlation',
'Schaefer7n200p_Tian_s1_full_correlation',
'Schaefer7n500p_Tian_s4_full_correlation']

five_folds_ts_fc = []

for atlas in atlases:
    for fold in folds:
        pls = pd.read_csv(f'/PLS/brain/rs/parcellation_main/rs_matched_to_dti/fold_{fold}/result/{atlas}_fold_{fold}_PLS_result.csv', header=None)
        pls.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
        five_folds_ts_fc.append(pls)
        five_folds_ts_full_corr_full = pd.concat(five_folds_ts_fc, ignore_index=False)


################################ main_modalities
folds = ["0", "1", "2", "3", "4"]
amplitudes = [
'amplitudes_21',
'amplitudes_55']

five_folds_ica_amplitudes = []
for a in amplitudes:
    for fold in folds:
        pls = pd.read_csv(f'/PLS/brain/rs/ica_main/fold_{fold}/models/csv/{a}_fold_{fold}_PLS_result.csv', header=None)
        pls.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
        five_folds_ica_amplitudes.append(pls)
        five_folds_ica_amplitudes_full = pd.concat(five_folds_ica_amplitudes, ignore_index=False)

#################################### tangent IDP
five_folds_ica_tangent = []
tangent = ['tangent_matrices_21', 'tangent_matrices_55']
for tg in tangent:
    for fold in folds:
        tangent = pd.read_csv(f'/PLS/brain/rs/ica_tangent/fold_{fold}/models/csv/{tg}_fold_{fold}_PLS_result.csv', header=None)
        tangent.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
        five_folds_ica_tangent.append(tangent)
        five_folds_ica_tangent_full = pd.concat(five_folds_ica_tangent, ignore_index=False)

rs_pls_full = pd.concat([five_folds_ts_full_corr_full, five_folds_ica_amplitudes_full, five_folds_ica_tangent_full], axis=0, ignore_index=True).round(decimals=3)

rs_pls_full['Pearson r'] = rs_pls_full['Pearson r'].astype(str).str.replace(r'PearsonRResult\(statistic=|pvalue=|\)', '', regex=True)
rs_pls_full[['Pearson r', 'p-value']] = rs_pls_full['Pearson r'].str.split(',', expand=True).astype(float).round(decimals=3)
rs_pls_full['n components'] = rs_pls_full['n components'].astype(str).str.replace(r"{'n_components':", '', regex=True)
rs_pls_full['n components'] = rs_pls_full['n components'].astype(str).str.replace(r"}", '', regex=True)
rs_pls_full['Modality'] = rs_pls_full['Modality'].astype(str).str.replace(r"\['Schaefer7n500p_Tian_s4'\]", 'Schaefer7n500p_Tian_s4_full_correlation', regex=True)
rs_pls_full.to_csv('/PLS/brain/RS_PLS_Result_five_folds.csv', index=False)

with pd.option_context('display.max_rows', None):
    display(rs_pls_full)

Rename modalities

In [None]:
rs_pls_plot = rs_pls_full.copy()
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("aparc_Tian_s1_full_correlation", 'aparc MSA I Full correlation', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("aparc_2009_Tian_s1_full_correlation", 'aparc a2009s MSA I Full correlation', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("glasser_Tian_s1_arrays_full_correlation", 'Glasser MSA I Full correlation', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("glasser_Tian_s4_arrays_full_correlation", 'Glasser MSA IV Full correlation', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("Schaefer7n200p_tian_s1_full_correlation", 'Schaefer7n200p MSA I Full correlation', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("Schaefer7n500p_Tian_s4_full_correlation", 'Schaefer7n500p MSA IV Full correlation', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("amplitudes_21", 'Amplitudes 21 IC', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("amplitudes_55", 'Amplitudes 55 IC', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("tangent_matrices_21", 'Tangent matrices 21 IC', regex=True)
rs_pls_plot['Modality'] = rs_pls_plot['Modality'].astype(str).str.replace("tangent_matrices_55", 'Tangent matrices 55 IC', regex=True)
rs_pls_plot.to_csv('/PLS/brain/RS_PLS_Result_five_folds_renamed.csv', index=False)
rs_pls_plot

In [None]:
# Average across folds
rs_pls_full_mean = rs_pls_plot[['R2', 'Pearson r', 'Modality', 'MSE', 'MAE']]
rs_pls_full_mean.groupby(['Modality']).mean().round(3).sort_values(by='R2', ascending=False)

## Plot Pearson's r distributions for each RS modality

In [None]:
rs_grouped = rs_pls_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
rs_grouped_sort = rs_pls_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
new_index = rs_grouped_sort.index
rs_grouped_reindex = rs_grouped.reindex(new_index)
rs_grouped_reindex = rs_grouped_reindex.reset_index(drop=True)
rs_pls_plot.to_csv('/PLS/brain/plot/rs_pls_plot.csv', index=False)
rs_grouped_reindex.to_csv('/PLS/brain/plot/rs_grouped_reindex.csv', index=False)
rs_grouped_reindex

### Dot/Lineplot

In [None]:
rs_pls_plot['Modality'].unique()

In [None]:
rs_parcellations = [
'aparc MSA I Full correlation',
'aparc a2009s MSA I Full correlation',
'Glasser MSA I Full correlation',
'Glasser MSA IV Full correlation',
'Schaefer7n200p MSA I Full correlation',
'Schaefer7n500p MSA IV Full correlation'
]

rs_idp = [
'Amplitudes 21 IC',
'Amplitudes 55 IC',
'Tangent matrices 21 IC',
'Tangent matrices 55 IC'
]

### Add stacked model

In [None]:
rs_pls = pd.read_csv('/PLS/brain/plot/rs_pls_plot.csv')
rs_stack = pd.read_csv('/PLS/brain/stacking/RS_IDP_Timeseries_best_metrics_stacked_five_folds.csv')

rs_stack = rs_stack[rs_stack['Algorithm'] == 'rf']
rs_stack_rf = rs_stack[['Fold', 'Pearson r']]
rs_stack_rf['Modality'] = 'rsMRI Stacked (RF)'
rs_pls_plot = rs_pls[['Fold', 'Pearson r', 'Modality']]
rs_pls_stack_plot = pd.concat([rs_pls_plot, rs_stack_rf], axis=0)
rs_pls_stack_plot

In [None]:
rs_pls_stack_grouped = rs_pls_stack_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
rs_pls_stack_grouped_sort = rs_pls_stack_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
rs_pls_stack_new_index = rs_pls_stack_grouped_sort.index
rs_pls_stack_grouped_reindex = rs_pls_stack_grouped.reindex(rs_pls_stack_new_index)
rs_pls_stack_grouped_reindex = rs_pls_stack_grouped_reindex.reset_index(drop=True)
rs_pls_stack_grouped_reindex

In [None]:
rs_full_r = rs_pls_stack_plot.copy()
rs_pls_stack_plot.to_csv('/PLS/brain/stacking/rs_pls_stack_plot.csv', index=False)
rs_full_grouped_reindex_r = rs_pls_stack_grouped_reindex.copy()

In [None]:
plt.figure(figsize=(13, 15))

# Create an empty plot without boxplot
for i, modality in enumerate(rs_pls_stack_grouped_reindex['Modality']):
    y = [i + 1] * len(rs_pls_stack_plot[rs_pls_stack_plot['Modality'] == modality]['Pearson r'])
    x = rs_pls_stack_plot[rs_pls_stack_plot['Modality'] == modality]['Pearson r']

    if modality in rs_parcellations:
        color = 'mediumaquamarine' #seagreen
        marker='D'
        s=90
    elif modality in rs_idp:
        color = 'steelblue'
        marker= 'o'
        s=90
    else:
        color = 'red'
        marker= '*'
        s=200
        
    plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3,  alpha=0.8) #setting zorder=3 for the plt.scatter function ensures that the dots are drawn above the boxplot elements
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(rs_pls_stack_plot[rs_pls_stack_plot['Modality'] == modality]['Pearson r'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)

    if modality == 'rsMRI Stacked (RF)':
        plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.02))
plt.yticks(range(1, len(rs_pls_stack_grouped_reindex['Modality']) + 1), rs_pls_stack_grouped_reindex['Modality'], fontsize = 25)
plt.xticks(fontsize = 15)
plt.xlabel("Pearson's $r$", fontsize = 20)
plt.title("PLSR Performance of rsMRI: Pearson's $r$", fontsize = 30)
plt.show()

### Do all the same for R2

In [None]:
#rs_stack = pd.read_csv('/PLS/brain/stacking/RS_IDP_Timeseries_best_metrics_stacked_five_folds.csv')
rs_stack = rs_stack[rs_stack['Algorithm'] == 'rf']
rs_stack_rf = rs_stack[['Fold', 'R2']]
rs_stack_rf['Modality'] = 'rsMRI Stacked (RF)'
rs_pls_plot = rs_pls[['Fold', 'R2', 'Modality']]
rs_pls_stack_plot_r2 = pd.concat([rs_pls_plot, rs_stack_rf], axis=0)

rs_pls_stack_grouped = rs_pls_stack_plot_r2.groupby('Modality')['R2'].apply(list).reset_index(name='R2 values')
rs_pls_stack_grouped_sort = rs_pls_stack_plot_r2.groupby('Modality')['R2'].mean().reset_index(name='Mean R2 values').sort_values(by='Mean R2 values', ascending = True)
rs_pls_stack_new_index = rs_pls_stack_grouped_sort.index
rs_pls_stack_grouped_reindex_r2 = rs_pls_stack_grouped.reindex(rs_pls_stack_new_index)
rs_pls_stack_grouped_reindex_r2 = rs_pls_stack_grouped_reindex_r2.reset_index(drop=True)
rs_pls_stack_grouped_reindex_r2

In [None]:
rs_pls_stack_plot_r2.to_csv('/PLS/brain/stacking/rs_pls_stack_plot_r2.csv', index=False)
rs_full_r2 = rs_pls_stack_plot_r2.copy()
rs_full_grouped_reindex_r2 = rs_pls_stack_grouped_reindex_r2.copy()

In [None]:
plt.figure(figsize=(13, 15))

# Create an empty plot without boxplot
for i, modality in enumerate(rs_pls_stack_grouped_reindex_r2['Modality']):
    y = [i + 1] * len(rs_pls_stack_plot_r2[rs_pls_stack_plot_r2['Modality'] == modality]['R2'])
    x = rs_pls_stack_plot_r2[rs_pls_stack_plot_r2['Modality'] == modality]['R2']

    if modality in rs_parcellations:
        color = 'mediumaquamarine' #seagreen
        marker='D'
        s=90
    elif modality in rs_idp:
        color = 'steelblue'
        marker= 'o'
        s=90
    else:
        color = 'red'
        marker= '*'
        s=200
        
    plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3,  alpha=0.8)
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(rs_pls_stack_plot_r2[rs_pls_stack_plot_r2['Modality'] == modality]['R2'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)

    if modality == 'rsMRI Stacked (RF)':
        plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.01))
plt.yticks(range(1, len(rs_pls_stack_grouped_reindex_r2['Modality']) + 1), rs_pls_stack_grouped_reindex_r2['Modality'], fontsize = 25)
plt.xticks(fontsize = 15)
plt.xlabel("$R^2$", fontsize = 20)
plt.title("PLSR Performance of rsMRI: $R^2$", fontsize = 30)
plt.show()

# T1w structural + whole-brain T1w/T2w

In [None]:
# Merge PLS results for T1
folds = ['0', '1', '2', '3', '4']
t1_modalities = ['struct_fast', 'struct_sub_first',
'struct_aseg_mean_intensity', 'struct_aseg_volume',
'struct_ba_exvivo_area',  'struct_ba_exvivo_mean_thickness', 'struct_ba_exvivo_volume',
'struct_a2009s_area', 'struct_a2009s_mean_thickness', 'struct_a2009s_volume',
'struct_dkt_area', 'struct_dkt_mean_thickness', 'struct_dkt_volume',
'struct_desikan_gw', 'struct_desikan_pial', 'struct_desikan_white_area', 'struct_desikan_white_mean_thickness', 'struct_desikan_white_volume',
'struct_subsegmentation']

five_folds_t1 = []
for modality in t1_modalities:
    for fold in folds:
        t1 = pd.read_csv(f'/PLS/brain/t1/fold_{fold}/models/{modality}_fold_{fold}_PLS_result.csv', header=None)
        t1.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
        five_folds_t1.append(t1)
        five_folds_t1_struct = pd.concat(five_folds_t1, ignore_index=False)


five_folds_t1_t2_wb = []
for fold in folds:
    t1_t2_wb = pd.read_csv(f'/PLS/brain/additional/fold_{fold}/models/csv/T1_T2_whole_brain_fold_{fold}_PLS_result.csv', header=None)
    t1_t2_wb.columns = ['Fold', 'Modality', 'n components', 'MSE', 'MAE', 'R2', 'Pearson r']
    five_folds_t1_t2_wb.append(t1_t2_wb)
    five_folds_t1_t2_additional = pd.concat(five_folds_t1_t2_wb, ignore_index=False)

t1_t2_pls = pd.concat([five_folds_t1_struct, five_folds_t1_t2_additional], axis=0, ignore_index=True).round(decimals=3)

t1_t2_pls['Pearson r'] = t1_t2_pls['Pearson r'].astype(str).str.replace(r'PearsonRResult\(statistic=|pvalue=|\)', '', regex=True)
t1_t2_pls[['Pearson r', 'p-value']] = t1_t2_pls['Pearson r'].str.split(',', expand=True).astype(float).round(decimals=3)
t1_t2_pls['n components'] = t1_t2_pls['n components'].astype(str).str.replace(r"{'n_components':", '', regex=True)
t1_t2_pls['n components'] = t1_t2_pls['n components'].astype(str).str.replace(r"}", '', regex=True)

t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_fast", 'FSL FAST', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_sub_first", 'FSL FIRST', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_aseg_mean_intensity", 'ASEG mean intensity', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_aseg_volume", 'ASEG volume', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_ba_exvivo_area", 'BA ex-vivo area', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_ba_exvivo_mean_thickness", 'BA ex-vivo mean thickness', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_ba_exvivo_volume", 'BA ex-vivo volume', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_a2009s_area", 'aparc a2009s area', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_a2009s_mean_thickness", 'aparc a2009s mean thickness', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_a2009s_volume", 'aparc a2009s volume', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_dkt_area", 'Desikan-Killiany-Tourville area', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_dkt_mean_thickness", 'Desikan-Killiany-Tourville mean thickness', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_dkt_volume", 'Desikan-Killiany-Tourville volume', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_desikan_gw", 'Desikan grey/white matter intensity', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_desikan_pial", 'Desikan pial', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_desikan_white_area", 'Desikan white matter area', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_desikan_white_mean_thickness", 'Desikan white matter mean thickness', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_desikan_white_volume", 'Desikan white matter volume', regex=True)
t1_t2_pls['Modality'] = t1_t2_pls['Modality'].astype(str).str.replace("struct_subsegmentation", 'Subcortical volumetric subsegmentation', regex=True)
t1_t2_pls

In [None]:
# Average across folds
t1_t2_pls_mean = t1_t2_pls[['R2', 'Pearson r', 'Modality', 'MSE', 'MAE']].groupby(['Modality']).mean().round(3).reset_index().sort_values(by="R2", ascending=False)
t1_t2_pls_mean

In [None]:
# Save result
t1_t2_pls_plot = t1_t2_pls.copy()
t1_t2_pls_plot.to_csv('/PLS/brain/Struct_PLS_Result_five_folds.csv', index=False)
# Group
t1_grouped = t1_t2_pls_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
t1_grouped

In [None]:
t1_grouped_sort = t1_t2_pls_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
t1_new_index = t1_grouped_sort.index
t1_grouped_reindex = t1_grouped.reindex(t1_new_index)
t1_grouped_reindex = t1_grouped_reindex.reset_index(drop=True)
t1_grouped_reindex

In [None]:
t1_t2_pls_plot.to_csv('/PLS/brain/plot/t1_t2_pls_plot.csv', index=False)
t1_grouped_reindex.to_csv('/PLS/brain/plot/t1_grouped_reindex.csv', index=False)

### Dot/Lineplot

### Add stacked model

In [None]:
t1t2_pls = pd.read_csv('/PLS/brain/plot/t1_t2_pls_plot.csv')
t1t2_stack = pd.read_csv('/PLS/brain/stacking/T1_T2_whole_brain_stacked_five_folds.csv')
t1t2_stack = t1t2_stack[t1t2_stack['Algorithm'] == 'svr']
t1t2_stack_svr = t1t2_stack[['Fold', 'Pearson r']]
t1t2_stack_svr['Modality'] = 'T1w/T2w Structural Stacked (SVR)'
t1t2_pls_plot = t1t2_pls[['Fold', 'Pearson r', 'Modality']]
t1t2_pls_stack_plot = pd.concat([t1t2_pls_plot, t1t2_stack_svr], axis=0)
t1t2_pls_stack_plot

In [None]:
t1t2_pls_stack_grouped = t1t2_pls_stack_plot.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
t1t2_pls_stack_grouped_sort = t1t2_pls_stack_plot.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
t1t2_pls_stack_new_index = t1t2_pls_stack_grouped_sort.index
t1t2_pls_stack_grouped_reindex = t1t2_pls_stack_grouped.reindex(t1t2_pls_stack_new_index)
t1t2_pls_stack_grouped_reindex = t1t2_pls_stack_grouped_reindex.reset_index(drop=True)
t1t2_pls_stack_grouped_reindex

In [None]:
t1t2_full_r = t1t2_pls_stack_plot.copy()
t1t2_pls_stack_plot.to_csv('/PLS/brain/stacking/t1t2_pls_stack_plot.csv', index=False)
t1t2_full_grouped_reindex_r = t1t2_pls_stack_grouped_reindex.copy()

In [None]:
plt.figure(figsize=(13, 15))

# Create an empty plot without boxplot
for i, modality in enumerate(t1t2_pls_stack_grouped_reindex['Modality']):
    y = [i + 1] * len(t1t2_pls_stack_plot[t1t2_pls_stack_plot['Modality'] == modality]['Pearson r'])
    x = t1t2_pls_stack_plot[t1t2_pls_stack_plot['Modality'] == modality]['Pearson r']

    if modality == 'T1w/T2w Structural Stacked (SVR)':
        color = 'red'
        marker= '*'
        s=200
    else:
        color = 'mediumaquamarine' #seagreen
        marker='D'
        s=90

    plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3,  alpha=0.8)
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(t1t2_pls_stack_plot[t1t2_pls_stack_plot['Modality'] == modality]['Pearson r'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)

    if modality == 'T1w/T2w Structural Stacked (SVR)':
        plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.02))
plt.yticks(range(1, len(t1t2_pls_stack_grouped_reindex['Modality']) + 1), t1t2_pls_stack_grouped_reindex['Modality'], fontsize = 15)
plt.xticks(fontsize = 15)
plt.xlabel("Pearson's $r$", fontsize = 20)
plt.title("PLSR Performance of T1w/T2w Structural MRI: Pearson's $r$", fontsize = 30)
plt.show()

### Do all the same for R2

In [None]:
t1t2_pls = pd.read_csv('/PLS/brain/plot/t1_t2_pls_plot.csv')
t1t2_stack = pd.read_csv('/PLS/brain/stacking/T1_T2_whole_brain_stacked_five_folds.csv')
t1t2_stack = t1t2_stack[t1t2_stack['Algorithm'] == 'svr']
t1t2_stack_svr = t1t2_stack[['Fold', 'Pearson r']]
t1t2_stack_svr['Modality'] = 'T1w/T2w Structural Stacked (SVR)'
t1t2_pls_plot = t1t2_pls[['Fold', 'Pearson r', 'Modality']]
t1t2_pls_stack_plot = pd.concat([t1t2_pls_plot, t1t2_stack_svr], axis=0)
t1t2_pls_stack_plot

In [None]:
t1t2_stack_svr = t1t2_stack[['Fold', 'R2']]
t1t2_stack_svr['Modality'] = 'T1w/T2w Structural Stacked (SVR)'
t1t2_pls_plot = t1t2_pls[['Fold', 'R2', 'Modality']]
t1t2_pls_stack_plot_r2 = pd.concat([t1t2_pls_plot, t1t2_stack_svr], axis=0)

t1t2_pls_stack_grouped = t1t2_pls_stack_plot_r2.groupby('Modality')['R2'].apply(list).reset_index(name='R2 values')
t1t2_pls_stack_grouped_sort = t1t2_pls_stack_plot_r2.groupby('Modality')['R2'].mean().reset_index(name='Mean R2 values').sort_values(by='Mean R2 values', ascending = True)
t1t2_pls_stack_new_index = t1t2_pls_stack_grouped_sort.index
t1t2_pls_stack_grouped_reindex_r2 = t1t2_pls_stack_grouped.reindex(t1t2_pls_stack_new_index)
t1t2_pls_stack_grouped_reindex_r2 = t1t2_pls_stack_grouped_reindex_r2.reset_index(drop=True)
t1t2_pls_stack_grouped_reindex_r2

In [None]:
t1t2_pls_stack_plot_r2.to_csv('/PLS/brain/stacking/t1t2_pls_stack_plot_r2.csv', index=False)
t1t2_full_r2 = t1t2_pls_stack_plot_r2.copy()
t1t2_full_grouped_reindex_r2 = t1t2_pls_stack_grouped_reindex_r2.copy()

In [None]:
plt.figure(figsize=(13, 15))

# Create an empty plot without boxplot
for i, modality in enumerate(t1t2_pls_stack_grouped_reindex_r2['Modality']):
    y = [i + 1] * len(t1t2_pls_stack_plot_r2[t1t2_pls_stack_plot_r2['Modality'] == modality]['R2'])
    x = t1t2_pls_stack_plot_r2[t1t2_pls_stack_plot_r2['Modality'] == modality]['R2']

    if modality == 'T1w/T2w Structural Stacked (SVR)':
        color = 'red'
        marker= '*'
        s=200
    else:
        color = 'mediumaquamarine' #seagreen
        marker='D'
        s=90
        
    plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3,  alpha=0.8)
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(t1t2_pls_stack_plot_r2[t1t2_pls_stack_plot_r2['Modality'] == modality]['R2'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)

    if modality == 'T1w/T2w Structural Stacked (SVR)':
        plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.01))
plt.yticks(range(1, len(t1t2_pls_stack_grouped_reindex_r2['Modality']) + 1), t1t2_pls_stack_grouped_reindex_r2['Modality'], fontsize = 15)
plt.xticks(fontsize = 15)
plt.xlabel("$R^2$", fontsize = 20)
plt.title("PLSR Performance of T1w/T2w Structural MRI: $R^2$", fontsize = 30)
plt.show()

# Plot all modalities

In [None]:
all_pls = pd.concat([t1_t2_pls_plot, dti_pls_plot, rs_pls_plot], axis=0, ignore_index=True)
all_pls

In [None]:
all_grouped = all_pls.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
all_grouped

In [None]:
all_grouped_sort = all_pls.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
all_new_index = all_grouped_sort.index
all_grouped_reindex = all_grouped.reindex(all_new_index)
all_grouped_reindex = all_grouped_reindex.reset_index(drop=True)
all_grouped_reindex

In [None]:
all_pls.to_csv('/PLS/brain/plot/all_pls.csv', index=False)
all_grouped_reindex.to_csv('/PLS/brain/plot/all_pls.csv', index=False)

In [None]:
dti_struct = [
'aparc MSA I Connectome Streamline Count',
'aparc MSA I Connectome SIFT2',
'aparc a2009s MSA I Connectome Streamline Count',
'Schaefer7n200p MSA I Connectome Streamline Count',
'Schaefer7n200p MSA I Connectome SIFT2',
'aparc MSA I Connectome FA',
'aparc a2009s MSA I Connectome FA',
'aparc MSA I Connectome Mean Length',
'aparc a2009s MSA I Connectome SIFT2',
'Schaefer7n200p MSA I Connectome FA',
'aparc a2009s MSA I Connectome Mean Length',
'Glasser MSA IV Connectome SIFT2',
'Glasser MSA IV Connectome Streamline Count',
'Glasser MSA IV Connectome FA',
'Schaefer7n500p MSA IV Connectome SIFT2',
'Schaefer7n500p MSA IV Connectome Streamline Count',
'Glasser MSA I Connectome FA',
'Schaefer7n200p MSA I Connectome Mean Length',
'Glasser MSA I Connectome Streamline Count',
'Schaefer7n500p MSA IV Connectome FA',
'Glasser MSA I Connectome SIFT2',
'Glasser MSA IV Connectome Mean Length',
'Glasser MSA I Connectome Mean Length',
'Schaefer7n500p MSA IV Connectome Mean Length'
]

dti_idp = [
'OD Probabilistic',
'MO Probabilistic',
'OD TBSS',
'ICVF Probabilistic',
'MO TBSS',
'ISOVF Probabilistic',
'FA Probabilistic',
'L2 Probabilistic',
'L1 Probabilistic',
'ISOVF TBSS',
'L3 Probabilistic',
'MD TBSS',
'MD Probabilistic',
'L2 TBSS',
'L3 TBSS',
'ICVF TBSS',
'L1 TBSS',
'FA TBSS'
]

rs_parcellations = [
'aparc MSA I Full correlation',
'aparc a2009s MSA I Full correlation',
'Glasser MSA I Full correlation',
'Glasser MSA IV Full correlation',
'Schaefer7n200p MSA I Full correlation',
'Schaefer7n500p MSA IV Full correlation'
]

rs_idp = [
'Amplitudes 21 IC',
'Amplitudes 55 IC',
'Tangent matrices 21 IC',
'Tangent matrices 55 IC'
]

In [None]:
# Define modalities: after renaming
rs_parcellations = ['aparc-I Functional Connectivity',
       'aparc.a2009s-I Functional Connectivity',
       'Glasser-I Functional Connectivity',
       'Glasser-IV Functional Connectivity',
       'Schaefer200-I Functional Connectivity',
       'Schaefer500-IV Functional Connectivity']
       
rs_idp = ['55 IC Amplitudes','21 IC Amplitudes', '21 IC Functional Connectivity','55 IC Functional Connectivity']    

dti_parcellations = ['aparc.a2009s-I FA',
       'aparc.a2009s-I Mean Length', 'aparc.a2009s-I SIFT2',
       'aparc.a2009s-I Streamline Count', 'aparc-I FA', 'aparc-I Mean Length',
       'aparc-I SIFT2', 'aparc-I Streamline Count', 'Glasser-I FA',
       'Glasser-I Mean Length', 'Glasser-I SIFT2',
       'Glasser-I Streamline Count', 'Glasser-IV FA', 'Glasser-IV Mean Length',
       'Glasser-IV SIFT2', 'Glasser-IV Streamline Count', 'Schaefer200-I FA',
       'Schaefer200-I Mean Length', 'Schaefer200-I SIFT2',
       'Schaefer200-I Streamline Count', 'Schaefer500-IV FA',
       'Schaefer500-IV Mean Length', 'Schaefer500-IV SIFT2',
       'Schaefer500-IV Streamline Count']
dti_idp = ['FA TBSS', 'FA Prob.', 'MD TBSS', 'MD Prob.',
       'L1 TBSS', 'L1 Prob.', 'L2 TBSS', 'L2 Prob.', 'L3 TBSS', 'L3 Prob.',
       'MO TBSS', 'MO Prob.', 'OD TBSS', 'OD Prob.', 'ICVF TBSS', 'ICVF Prob.',
       'ISOVF TBSS', 'ISOVF Prob.']
t1t2_modalities = ['FSL FAST', 'FSL FIRST',
       'ASEG Mean Thickness', 'ASEG Volume', 'BA ex-vivo Area',
       'BA ex-vivo Mean Thickness', 'BA ex-vivo Volume', 'aparc.a2009s Area',
       'aparc.a2009s Mean Thickness', 'aparc.a2009s Volume', 'DKT Area',
       'DKT Mean Thickness', 'DKT Volume', 'Desikan GM/WM Intensity',
       'Desikan Pial', 'Desikan WM Area', 'Desikan WM Mean Thickness',
       'Desikan WM Volume', 'Subcor. Volumetric Subsegment.',
       'Whole-brain T1/T2']

# All modalities + Stacked

In [None]:
# Define modalities
dti_struct = [
'aparc MSA I Connectome Streamline Count',
'aparc MSA I Connectome SIFT2',
'aparc a2009s MSA I Connectome Streamline Count',
'Schaefer7n200p MSA I Connectome Streamline Count',
'Schaefer7n200p MSA I Connectome SIFT2',
'aparc MSA I Connectome FA',
'aparc a2009s MSA I Connectome FA',
'aparc MSA I Connectome Mean Length',
'aparc a2009s MSA I Connectome SIFT2',
'Schaefer7n200p MSA I Connectome FA',
'aparc a2009s MSA I Connectome Mean Length',
'Glasser MSA IV Connectome SIFT2',
'Glasser MSA IV Connectome Streamline Count',
'Glasser MSA IV Connectome FA',
'Schaefer7n500p MSA IV Connectome SIFT2',
'Schaefer7n500p MSA IV Connectome Streamline Count',
'Glasser MSA I Connectome FA',
'Schaefer7n200p MSA I Connectome Mean Length',
'Glasser MSA I Connectome Streamline Count',
'Schaefer7n500p MSA IV Connectome FA',
'Glasser MSA I Connectome SIFT2',
'Glasser MSA IV Connectome Mean Length',
'Glasser MSA I Connectome Mean Length',
'Schaefer7n500p MSA IV Connectome Mean Length'
]

dti_idp = [
'OD Probabilistic',
'MO Probabilistic',
'OD TBSS',
'ICVF Probabilistic',
'MO TBSS',
'ISOVF Probabilistic',
'FA Probabilistic',
'L2 Probabilistic',
'L1 Probabilistic',
'ISOVF TBSS',
'L3 Probabilistic',
'MD TBSS',
'MD Probabilistic',
'L2 TBSS',
'L3 TBSS',
'ICVF TBSS',
'L1 TBSS',
'FA TBSS'
]

rs_parcellations = [
'aparc MSA I Full correlation',
'aparc a2009s MSA I Full correlation',
'Glasser MSA I Full correlation',
'Glasser MSA IV Full correlation',
'Schaefer7n200p MSA I Full correlation',
'Schaefer7n500p MSA IV Full correlation'
]

rs_idp = [
'Amplitudes 21 IC',
'Amplitudes 55 IC',
'Tangent matrices 21 IC',
'Tangent matrices 55 IC'
]

modalities_struct = ['FSL FAST', 'FSL FIRST', 'ASEG mean intensity', 'ASEG volume',
       'BA ex-vivo area', 'BA ex-vivo mean thickness',
       'BA ex-vivo volume', 'aparc a2009s area',
       'aparc a2009s mean thickness', 'aparc a2009s volume',
       'Desikan-Killiany-Tourville area',
       'Desikan-Killiany-Tourville mean thickness',
       'Desikan-Killiany-Tourville volume',
       'Desikan grey/white matter intensity', 'Desikan pial',
       'Desikan white matter area', 'Desikan white matter mean thickness',
       'Desikan white matter volume',
       'Subcortical volumetric subsegmentation', 'Whole-brain T1/T2']

In [None]:
# Define modalities: after renaming
rs_parcellations = ['aparc-I Functional Connectivity',
       'aparc.a2009s-I Functional Connectivity',
       'Glasser-I Functional Connectivity',
       'Glasser-IV Functional Connectivity',
       'Schaefer200-I Functional Connectivity',
       'Schaefer500-IV Functional Connectivity']
       
rs_idp = ['55 IC Amplitudes','21 IC Amplitudes', '21 IC Functional Connectivity','55 IC Functional Connectivity']    

dti_parcellations = ['aparc.a2009s-I FA',
       'aparc.a2009s-I Mean Length', 'aparc.a2009s-I SIFT2',
       'aparc.a2009s-I Streamline Count', 'aparc-I FA', 'aparc-I Mean Length',
       'aparc-I SIFT2', 'aparc-I Streamline Count', 'Glasser-I FA',
       'Glasser-I Mean Length', 'Glasser-I SIFT2',
       'Glasser-I Streamline Count', 'Glasser-IV FA', 'Glasser-IV Mean Length',
       'Glasser-IV SIFT2', 'Glasser-IV Streamline Count', 'Schaefer200-I FA',
       'Schaefer200-I Mean Length', 'Schaefer200-I SIFT2',
       'Schaefer200-I Streamline Count', 'Schaefer500-IV FA',
       'Schaefer500-IV Mean Length', 'Schaefer500-IV SIFT2',
       'Schaefer500-IV Streamline Count']
dti_idp = ['FA TBSS', 'FA Prob.', 'MD TBSS', 'MD Prob.',
       'L1 TBSS', 'L1 Prob.', 'L2 TBSS', 'L2 Prob.', 'L3 TBSS', 'L3 Prob.',
       'MO TBSS', 'MO Prob.', 'OD TBSS', 'OD Prob.', 'ICVF TBSS', 'ICVF Prob.',
       'ISOVF TBSS', 'ISOVF Prob.']
t1t2_modalities = ['FSL FAST', 'FSL FIRST',
       'ASEG Mean Thickness', 'ASEG Volume', 'BA ex-vivo Area',
       'BA ex-vivo Mean Thickness', 'BA ex-vivo Volume', 'aparc.a2009s Area',
       'aparc.a2009s Mean Thickness', 'aparc.a2009s Volume', 'DKT Area',
       'DKT Mean Thickness', 'DKT Volume', 'Desikan GM/WM Intensity',
       'Desikan Pial', 'Desikan WM Area', 'Desikan WM Mean Thickness',
       'Desikan WM Volume', 'Subcor. Volumetric Subsegment.',
       'Whole-brain T1/T2']

### Pearson r

In [None]:
all_stack = pd.read_csv('/PLS/brain/stacking/All_modalities_stacked_five_folds.csv')
all_stack = all_stack[all_stack['Algorithm'] == 'xgb']
all_stack_xgb_r = all_stack[['Fold', 'Pearson r']]
all_stack_xgb_r['Modality'] = 'All MRI Modalities Stacked (XGB)'
all_stack_xgb_r

In [None]:
all_modalities_plus_stacked_plot_r = pd.concat([dti_full_r, rs_full_r, t1t2_full_r, all_stack_xgb_r], axis = 0)
all_modalities_plus_stacked_plot_r.to_csv('/PLS/brain/stacking/all_modalities_plus_stacked_plot_r.csv', index=False)

In [None]:
all_modalities_plus_stacked_plot_r = pd.read_csv('/PLS/brain/stacking/all_modalities_plus_stacked_plot_r.csv')
all_modalities_plus_stacked_plot_r

In [None]:
# Rename modalities for plotting
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].astype(str).str.replace('aparc MSA I Full correlation', 'aparc-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s MSA I Full correlation', 'aparc.a2009s-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA I Full correlation', 'Glasser-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA IV Full correlation', 'Glasser-IV Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n200p MSA I Full correlation', 'Schaefer200-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n500p MSA IV Full correlation', 'Schaefer500-IV Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Amplitudes 21 IC', '55 IC Amplitudes', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Amplitudes 55 IC', '21 IC Amplitudes', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Tangent matrices 21 IC', '21 IC Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Tangent matrices 55 IC', '55 IC Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s MSA I Connectome FA', 'aparc.a2009s-I FA', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s MSA I Connectome Mean Length', 'aparc.a2009s-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s MSA I Connectome SIFT2', 'aparc.a2009s-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s MSA I Connectome Streamline Count', 'aparc.a2009s-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc MSA I Connectome FA', 'aparc-I FA', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc MSA I Connectome Mean Length', 'aparc-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc MSA I Connectome SIFT2', 'aparc-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc MSA I Connectome Streamline Count', 'aparc-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA I Connectome FA', 'Glasser-I FA', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA I Connectome Mean Length', 'Glasser-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA I Connectome SIFT2', 'Glasser-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA I Connectome Streamline Count', 'Glasser-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA IV Connectome FA', 'Glasser-IV FA', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA IV Connectome Mean Length', 'Glasser-IV Mean Length', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA IV Connectome SIFT2', 'Glasser-IV SIFT2', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Glasser MSA IV Connectome Streamline Count', 'Glasser-IV Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n200p MSA I Connectome FA', 'Schaefer200-I FA', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n200p MSA I Connectome Mean Length', 'Schaefer200-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n200p MSA I Connectome SIFT2', 'Schaefer200-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n200p MSA I Connectome Streamline Count', 'Schaefer200-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n500p MSA IV Connectome FA', 'Schaefer500-IV FA', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n500p MSA IV Connectome Mean Length', 'Schaefer500-IV Mean Length', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n500p MSA IV Connectome SIFT2', 'Schaefer500-IV SIFT2', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Schaefer7n500p MSA IV Connectome Streamline Count', 'Schaefer500-IV Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('FA Probabilistic', 'FA Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('MD Probabilistic', 'MD Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('L1 Probabilistic', 'L1 Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('L2 Probabilistic', 'L2 Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('L3 Probabilistic', 'L3 Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('MO Probabilistic', 'MO Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('OD Probabilistic', 'OD Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('ICVF Probabilistic', 'ICVF Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('ISOVF Probabilistic', 'ISOVF Prob.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s Area', 'aparc.a2009s Area', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s Mean Thickness', 'aparc.a2009s Mean Thickness', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('aparc a2009s volume', 'aparc.a2009s Volume', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan-Killiany-Tourville Area', 'DKT Area', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan-Killiany-Tourville Mean Thickness', 'DKT Mean Thickness', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan-Killiany-Tourville volume', 'DKT Volume', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan Grey/White Matter intensity', 'Desikan GM/WM Intensity', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan pial', 'Desikan Pial', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan White Matter Area', 'Desikan WM Area', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan White Matter Mean Thickness', 'Desikan WM Mean Thickness', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Desikan White Matter volume', 'Desikan WM Volume', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('Subcortical Volumetric Subsegmentation', 'Subcor. Volumetric Subsegment.', regex=True)
all_modalities_plus_stacked_plot_r['Modality'] = all_modalities_plus_stacked_plot_r['Modality'].str.replace('ASEG volume', 'ASEG Volume', regex=True)

In [None]:
# Arrange values for plotting
all_modalities_plus_stacked_plot_r_grouped = all_modalities_plus_stacked_plot_r.groupby('Modality')['Pearson r'].apply(list).reset_index(name='Pearson r values')
all_modalities_plus_stacked_plot_r_grouped_sort = all_modalities_plus_stacked_plot_r.groupby('Modality')['Pearson r'].mean().reset_index(name='Mean Pearson r values').sort_values(by='Mean Pearson r values', ascending = True)
all_modalities_plus_stacked_plot_r_new_index = all_modalities_plus_stacked_plot_r_grouped_sort.index
all_modalities_plus_stacked_grouped_reindex_r = all_modalities_plus_stacked_plot_r_grouped.reindex(all_modalities_plus_stacked_plot_r_new_index)
all_modalities_plus_stacked_grouped_reindex_r = all_modalities_plus_stacked_grouped_reindex_r.reset_index(drop=True)
all_modalities_plus_stacked_grouped_reindex_r

In [None]:
plt.figure(figsize=(20, 40))

legend_order = [
    'rsMRI IDPs',
    'rsMRI Parcellations',
    'rsMRI Stacked (RF)',
    'dwMRI IDPs',
    'dwMRI Parcellations',
    'dwMRI Stacked (RF)',
    'T1w/T2w Structural MRI',
    'T1w/T2w Structural Stacked (SVR)',
    'All MRI Modalities Stacked (XGB)']

label_used = {label: False for label in legend_order}

for i, modality in enumerate(all_modalities_plus_stacked_grouped_reindex_r['Modality']):
    y = [i + 1] * len(all_modalities_plus_stacked_plot_r[all_modalities_plus_stacked_plot_r['Modality'] == modality]['Pearson r'])
    x = all_modalities_plus_stacked_plot_r[all_modalities_plus_stacked_plot_r['Modality'] == modality]['Pearson r']

    if modality in rs_idp:
        color = 'seagreen'
        marker='D'
        s=90
        label = 'rsMRI IDPs'

    elif modality in rs_parcellations:
        color = 'mediumaquamarine'
        marker='D'
        s=90
        label = 'rsMRI Parcellations'

    elif modality == 'rsMRI Stacked (RF)':
        color = 'red'
        marker='D'
        s=90
        label = 'rsMRI Stacked (RF)'

    elif modality in dti_idp:
        color = 'blue'
        marker='o'
        s=90
        label = 'dwMRI IDPs'
        
    elif modality in dti_parcellations:
        color = 'steelblue'
        marker='o'
        s=90
        label = 'dwMRI Parcellations'

    elif modality == 'dwMRI Stacked (RF)':
        color = 'crimson'
        marker='o'
        s=90
        label = 'dwMRI Stacked (RF)'

    elif modality in t1t2_modalities:
        color = 'wheat'
        marker= 's'
        s=90
        label = 'T1w/T2w Structural MRI'

    elif modality == 'T1w/T2w Structural Stacked (SVR)':
        color = 'crimson'
        marker= 's'
        s=90
        label = 'T1w/T2w Structural Stacked (SVR)'

    elif modality == 'All MRI Modalities Stacked (XGB)':
        color = 'crimson'
        marker= '*'
        s=200
        label = 'All MRI Modalities Stacked (XGB)'

    else:
        label = None

    # Only add the label if it hasn't been used yet
    if label and not label_used[label]:
        plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3, alpha=0.8, label=label)
        label_used[label] = True
    else:
        plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3, alpha=0.8)

    #plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3, alpha=0.8, label=label)
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(all_modalities_plus_stacked_plot_r[all_modalities_plus_stacked_plot_r['Modality'] == modality]['Pearson r'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)
    
    if modality in ['rsMRI Stacked (RF)', 'dwMRI Stacked (RF)', 'T1w/T2w Structural Stacked (SVR)', 'All MRI Modalities Stacked (XGB)']:
        plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=3)
        plt.text(mean_value, i + 1.4, f'$r$ = {mean_value:.2f}', color='red', fontsize=12, ha='center')

        #plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

# Remove duplicate labels in the legend
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
ordered_handles = [by_label[label] for label in legend_order if label in by_label]
plt.legend(ordered_handles, legend_order, fontsize=20, loc='lower right')
#plt.legend(by_label.values(), by_label.keys(), fontsize=15) #, loc='lower right'

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.02))
plt.yticks(range(1, len(all_modalities_plus_stacked_grouped_reindex_r['Modality']) + 1), all_modalities_plus_stacked_grouped_reindex_r['Modality'], fontsize=20)
plt.xticks(fontsize=20, rotation=30)
#plt.xlabel("Pearson's $r$", fontsize=20)
plt.title("Out-of-Sample Performance of MRI Modalities: \n Pearson's $r$", fontsize=30)
plt.savefig('/IBu/Articles-Conferences/Articles/1.JAMA/Plots_and_Tables/pls_stack/PLS_Stacking_All_Mod_r.png',
                    bbox_inches="tight", 
                    pad_inches=1,
                    transparent=False, 
                    facecolor="w", 
                    edgecolor='w', 
                    #orientation='landscape'
                    )
plt.show()

### R2

In [None]:
rs_full_r2 = pd.read_csv('/PLS/brain/stacking/rs_pls_stack_plot_r2.csv')
dti_full_r2 = pd.read_csv('/PLS/brain/stacking/dti_pls_stack_plot_r2.csv')
t1t2_full_r2 = pd.read_csv('/PLS/brain/stacking/t1t2_pls_stack_plot_r2.csv')

In [None]:
all_stack = pd.read_csv('/PLS/brain/stacking/All_modalities_stacked_five_folds.csv')
all_stack = all_stack[all_stack['Algorithm'] == 'xgb']
all_stack_xgb_r2 = all_stack[['Fold', 'R2']]
all_stack_xgb_r2['Modality'] = 'All MRI Modalities Stacked (XGB)'
all_modalities_plus_stacked_plot_r2 = pd.concat([dti_full_r2, rs_full_r2, t1t2_full_r2, all_stack_xgb_r2], axis = 0)

In [None]:
# Rename modalities for plotting
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].astype(str).str.replace('aparc MSA I Full correlation', 'aparc-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s MSA I Full correlation', 'aparc.a2009s-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA I Full correlation', 'Glasser-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA IV Full correlation', 'Glasser-IV Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n200p MSA I Full correlation', 'Schaefer200-I Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n500p MSA IV Full correlation', 'Schaefer500-IV Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Amplitudes 21 IC', '55 IC Amplitudes', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Amplitudes 55 IC', '21 IC Amplitudes', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Tangent matrices 21 IC', '21 IC Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Tangent matrices 55 IC', '55 IC Functional Connectivity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s MSA I Connectome FA', 'aparc.a2009s-I FA', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s MSA I Connectome Mean Length', 'aparc.a2009s-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s MSA I Connectome SIFT2', 'aparc.a2009s-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s MSA I Connectome Streamline Count', 'aparc.a2009s-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc MSA I Connectome FA', 'aparc-I FA', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc MSA I Connectome Mean Length', 'aparc-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc MSA I Connectome SIFT2', 'aparc-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc MSA I Connectome Streamline Count', 'aparc-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA I Connectome FA', 'Glasser-I FA', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA I Connectome Mean Length', 'Glasser-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA I Connectome SIFT2', 'Glasser-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA I Connectome Streamline Count', 'Glasser-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA IV Connectome FA', 'Glasser-IV FA', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA IV Connectome Mean Length', 'Glasser-IV Mean Length', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA IV Connectome SIFT2', 'Glasser-IV SIFT2', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Glasser MSA IV Connectome Streamline Count', 'Glasser-IV Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n200p MSA I Connectome FA', 'Schaefer200-I FA', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n200p MSA I Connectome Mean Length', 'Schaefer200-I Mean Length', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n200p MSA I Connectome SIFT2', 'Schaefer200-I SIFT2', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n200p MSA I Connectome Streamline Count', 'Schaefer200-I Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n500p MSA IV Connectome FA', 'Schaefer500-IV FA', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n500p MSA IV Connectome Mean Length', 'Schaefer500-IV Mean Length', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n500p MSA IV Connectome SIFT2', 'Schaefer500-IV SIFT2', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Schaefer7n500p MSA IV Connectome Streamline Count', 'Schaefer500-IV Streamline Count', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('FA Probabilistic', 'FA Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('MD Probabilistic', 'MD Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('L1 Probabilistic', 'L1 Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('L2 Probabilistic', 'L2 Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('L3 Probabilistic', 'L3 Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('MO Probabilistic', 'MO Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('OD Probabilistic', 'OD Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('ICVF Probabilistic', 'ICVF Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('ISOVF Probabilistic', 'ISOVF Prob.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s Area', 'aparc.a2009s Area', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s Mean Thickness', 'aparc.a2009s Mean Thickness', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('aparc a2009s volume', 'aparc.a2009s Volume', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan-Killiany-Tourville Area', 'DKT Area', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan-Killiany-Tourville Mean Thickness', 'DKT Mean Thickness', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan-Killiany-Tourville volume', 'DKT Volume', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan Grey/White Matter intensity', 'Desikan GM/WM Intensity', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan pial', 'Desikan Pial', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan White Matter Area', 'Desikan WM Area', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan White Matter Mean Thickness', 'Desikan WM Mean Thickness', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Desikan White Matter volume', 'Desikan WM Volume', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('Subcortical Volumetric Subsegmentation', 'Subcor. Volumetric Subsegment.', regex=True)
all_modalities_plus_stacked_plot_r2['Modality'] = all_modalities_plus_stacked_plot_r2['Modality'].str.replace('ASEG volume', 'ASEG Volume', regex=True)

In [None]:
# Arrange values for plotting
all_modalities_plus_stacked_plot_r2_grouped = all_modalities_plus_stacked_plot_r2.groupby('Modality')['R2'].apply(list).reset_index(name='R2 values')
all_modalities_plus_stacked_plot_r2_grouped_sort = all_modalities_plus_stacked_plot_r2.groupby('Modality')['R2'].mean().reset_index(name='Mean R2 values').sort_values(by='Mean R2 values', ascending = True)
all_modalities_plus_stacked_plot_r2_new_index = all_modalities_plus_stacked_plot_r2_grouped_sort.index
all_modalities_plus_stacked_grouped_r2eindex_r2 = all_modalities_plus_stacked_plot_r2_grouped.reindex(all_modalities_plus_stacked_plot_r2_new_index)
all_modalities_plus_stacked_grouped_r2eindex_r2 = all_modalities_plus_stacked_grouped_r2eindex_r2.reset_index(drop=True)
all_modalities_plus_stacked_grouped_r2eindex_r2

In [None]:
plt.figure(figsize=(20, 40))

legend_order = [
    'rsMRI IDPs',
    'rsMRI Parcellations',
    'rsMRI Stacked (RF)',
    'dwMRI IDPs',
    'dwMRI Parcellations',
    'dwMRI Stacked (RF)',
    'T1w/T2w Structural MRI',
    'T1w/T2w Structural Stacked (SVR)',
    'All MRI Modalities Stacked (XGB)']

label_used = {label: False for label in legend_order}

for i, modality in enumerate(all_modalities_plus_stacked_grouped_r2eindex_r2['Modality']):
    y = [i + 1] * len(all_modalities_plus_stacked_plot_r2[all_modalities_plus_stacked_plot_r2['Modality'] == modality]['R2'])
    x = all_modalities_plus_stacked_plot_r2[all_modalities_plus_stacked_plot_r2['Modality'] == modality]['R2']

    if modality in rs_idp:
        color = 'seagreen'
        marker='D'
        s=90
        label = 'rsMRI IDPs'

    elif modality in rs_parcellations:
        color = 'mediumaquamarine'
        marker='D'
        s=90
        label = 'rsMRI Parcellations'

    elif modality == 'rsMRI Stacked (RF)':
        color = 'red'
        marker='D'
        s=90
        label = 'rsMRI Stacked (RF)'

    elif modality in dti_idp:
        color = 'blue'
        marker='o'
        s=90
        label = 'dwMRI IDPs'
        
    elif modality in dti_parcellations:
        color = 'steelblue'
        marker='o'
        s=90
        label = 'dwMRI Parcellations'

    elif modality == 'dwMRI Stacked (RF)':
        color = 'crimson'
        marker='o'
        s=90
        label = 'dwMRI Stacked (RF)'

    elif modality in t1t2_modalities:
        color = 'wheat'
        marker= 's'
        s=90
        label = 'T1w/T2w Structural MRI'

    elif modality == 'T1w/T2w Structural Stacked (SVR)':
        color = 'crimson'
        marker= 's'
        s=90
        label = 'T1w/T2w Structural Stacked (SVR)'

    elif modality == 'All MRI Modalities Stacked (XGB)':
        color = 'crimson'
        marker= '*'
        s=200
        label = 'All MRI Modalities Stacked (XGB)'

    else:
        label = None

    # Only add the label if it hasn't been used yet
    if label and not label_used[label]:
        plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3, alpha=0.8, label=label)
        label_used[label] = True
    else:
        plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3, alpha=0.8)

    #plt.scatter(x, y, color=color, marker=marker, s=s, zorder=3, alpha=0.8, label=label)
    plt.plot(x, y, color='black', linewidth=0.5)

    # Calculate the mean across 5 folds and plot a red line
    mean_value = np.mean(all_modalities_plus_stacked_plot_r2[all_modalities_plus_stacked_plot_r2['Modality'] == modality]['R2'])
    plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=1)
    
    if modality in ['rsMRI Stacked (RF)', 'dwMRI Stacked (RF)', 'T1w/T2w Structural Stacked (SVR)', 'All MRI Modalities Stacked (XGB)']:
        plt.plot([mean_value, mean_value], [i + 0.85, i + 1.15], color='red', linewidth=3)
        plt.text(mean_value, i + 1.4, f'$r$ = {mean_value:.2f}', color='red', fontsize=12, ha='center')

        #plt.axvline(x=mean_value, color='grey', linestyle='--', linewidth=0.5)

# Remove duplicate labels in the legend
handles, labels = plt.gca().get_legend_handles_labels()
by_label = dict(zip(labels, handles))
ordered_handles = [by_label[label] for label in legend_order if label in by_label]
plt.legend(ordered_handles, legend_order, fontsize=20, loc='lower right')
#plt.legend(by_label.values(), by_label.keys(), fontsize=15) #, loc='lower right'

plt.gca().xaxis.set_major_locator(ticker.MultipleLocator(0.02))
plt.yticks(range(1, len(all_modalities_plus_stacked_grouped_r2eindex_r2['Modality']) + 1), all_modalities_plus_stacked_grouped_r2eindex_r2['Modality'], fontsize=20)
plt.xticks(fontsize=20, rotation=30)
#plt.xlabel("Pearson's $r$", fontsize=20)
plt.title("Out-of-Sample Performance of MRI Modalities: $R^2$", fontsize=30)
plt.savefig('/IBu/Articles-Conferences/Articles/1.JAMA/Plots_and_Tables/pls_stack/PLS_Stacking_All_Mod_R2.png',
                    bbox_inches="tight", 
                    pad_inches=1,
                    transparent=False, 
                    facecolor="w", 
                    edgecolor='w', 
                    #orientation='landscape'
                    )
plt.show()