In [1]:
### The required libraries and packages ###
import os 
import networkx as nx
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

In [2]:
folder_path = "data/"
FA_path = "FA_matrices_c/"
GM_path = "GM_matrices_c/"
fMRI_path = "fMRI_matrices_c/"

In [3]:
names = pd.read_csv(folder_path + "mindboggle_ROIs.txt", sep = "\t" )
name_list = names.iloc[:, 0].tolist()

In [4]:
clinical_data = pd.read_csv(folder_path + "clinic.csv", index_col = 1)
cols_to_keep = ["age", "sex", "dd", "edss", "controls_ms"]
clinical_data = clinical_data[cols_to_keep]

In [5]:
def load_data(matrices_path):
    # We obtain all files names and filter for csv
    all_files = os.listdir(folder_path + matrices_path)
    csv_files = [file for file in all_files if file.endswith('.csv')]
    
    # Depending on which matrices we import we have diferent "endnames"
    if matrices_path == FA_path:
        to_split ="_FA_factor_corrected.csv"
    elif matrices_path == GM_path:
        to_split = "_GM_matrix_corrected.csv"
    elif matrices_path == fMRI_path:
        to_split = "_r_matrix_corrected.csv"

    # Create a dictionary
    #### keys are subjects ids, obtained from splitting file name
    #### values are each dataframe
    #### column names are nodes names
    dfs = {file.split(to_split)[0]: pd.read_csv(os.path.join(folder_path, matrices_path,file), header = None, names = name_list) 
          for file in csv_files}
    
    # Set dataframes index as nodes names
    for key, df in dfs.items():
        df.index = name_list
    
    # Create a list to store the upper triangular elements as dictionaries
    upper_triangular_dicts = []

    # Iterate through the dictionary and extract the upper triangular elements
    for key, df in dfs.items():
        n = len(df)
        upper_triangular_data = {}

        for i in range(n):
            for j in range(i+1, n):  # j must be > i in order to iterate over upper diagonal, we skip diagonal
                row_name = df.index[i] 
                col_name = df.columns[j]
                combined_name = f"{row_name}/{col_name}" # Create columns name as combination of nodes, i.e. connection 
                value = df.iloc[i, j]
                upper_triangular_data[combined_name] = value # Add value to dictionary

    # Add the upper triangular dictionary to the list
        upper_triangular_dicts.append(upper_triangular_data)

    # Convert the list of dictionaries to a DataFrame
    result = pd.DataFrame(upper_triangular_dicts, index=dfs.keys())
    df_final =pd.concat([result, clinical_data], axis = 1) # Merge with clinical data
    
    return(df_final)

In [6]:
FA_df = load_data(FA_path)
FA_df.head()

Unnamed: 0,ctx-lh-caudalanteriorcingulate/ctx-lh-caudalmiddlefrontal,ctx-lh-caudalanteriorcingulate/ctx-lh-cuneus,ctx-lh-caudalanteriorcingulate/ctx-lh-entorhinal,ctx-lh-caudalanteriorcingulate/ctx-lh-fusiform,ctx-lh-caudalanteriorcingulate/ctx-lh-inferiorparietal,ctx-lh-caudalanteriorcingulate/ctx-lh-inferiortemporal,ctx-lh-caudalanteriorcingulate/ctx-lh-isthmuscingulate,ctx-lh-caudalanteriorcingulate/ctx-lh-lateraloccipital,ctx-lh-caudalanteriorcingulate/ctx-lh-lateralorbitofrontal,ctx-lh-caudalanteriorcingulate/ctx-lh-lingual,...,ctx-rh-superiortemporal/ctx-rh-transversetemporal,ctx-rh-superiortemporal/ctx-rh-insula,ctx-rh-supramarginal/ctx-rh-transversetemporal,ctx-rh-supramarginal/ctx-rh-insula,ctx-rh-transversetemporal/ctx-rh-insula,age,sex,dd,edss,controls_ms
002MSVIS,0.422862,0.0,0.0,0.0,0.374716,0.344459,0.502935,0.0,0.383026,0.0,...,0.354605,0.401124,0.383299,0.37359,0.366205,51.53,1,22.18,7.5,1
003MSVIS,0.360946,0.0,0.0,0.0,0.346052,0.285988,0.467187,0.258432,0.376243,0.0,...,0.330118,0.37722,0.358125,0.352559,0.360763,58.06,0,9.57,6.0,1
004MSVIS,0.452194,0.0,0.0,0.0,0.390203,0.0,0.504437,0.330954,0.408644,0.0,...,0.322964,0.381072,0.390045,0.391322,0.372827,60.78,1,6.79,3.0,1
005MSVIS,0.395634,0.0,0.0,0.0,0.405738,0.363048,0.488313,0.321842,0.339294,0.005412,...,0.354825,0.412469,0.450964,0.42259,0.392626,37.95,0,8.45,1.5,1
010MSVIS,0.381513,0.0,0.0,0.0,0.385011,0.34425,0.462187,0.0,0.390884,0.0,...,0.300334,0.354225,0.380448,0.372597,0.362986,52.64,0,9.05,2.0,1


In [8]:
# Statistics
from scipy.stats import mannwhitneyu, ttest_ind, bartlett, shapiro
from statsmodels.stats.multitest import multipletests

In [35]:
import warnings
warnings.filterwarnings('ignore')

#Creation of empty dataframe to stro values
connection_stats = pd.DataFrame(columns = ["ROI", "pvalue"])
alpha = 0.05
features = FA_df.iloc[:, :-6].columns.to_list() #Remove clinical data columns and convert lo list

#Divide our dataset into HS and PwMS to check normality
HS = FA_df.loc[FA_df["controls_ms"] == 0, :]
PwMS = FA_df.loc[FA_df["controls_ms"] == 1, :]

for feat in features:
    # Check normality with Shapiro
    stat1, p1 = shapiro(HS[feat])
    stat2, p2 = shapiro(PwMS[feat])

    if (p1 > alpha) and (p2 > alpha): 

        #Check homoscedasticity to include it in test
        stat_b, p_b = bartlett(HS[feat], PwMS[feat])
        hom = True
        if p_b <= alpha:
            hom = False
        
        #If it meets conditions it is normal
        
        stat, p = ttest_ind(HS[feat], PwMS[feat], equal_var = hom)
        
    else:
        # When it is not normal
        stat,p = mannwhitneyu(HS[feat], PwMS[feat])
        
    if p <= alpha:
        new_row =pd.DataFrame({"ROI": [feat], "pvalue": [p]})
        connection_stats = pd.concat([connection_stats, new_row], ignore_index = True)  
    
# Add Bonferroni correction

bonf = multipletests(connection_stats["pvalue"], method = "bonferroni")
connection_stats["p_bonf"] = bonf[1]
# New dataframe only with ROIs that pass the test
df_bonf = connection_stats[connection_stats["p_bonf"] < alpha]
# keep a list of columns  (ROIs) that pass the test
stat_cols = df_bonf["ROI"].tolist()



In [43]:
#Create final dataframes
passed_df = FA_df[stat_cols]
clinical_data = FA_df[cols_to_keep] ##REMEMBER WE HAVE THESE COLUMS FROM PREVIOUS CELL

final_df = pd.concat([passed_df, clinical_data], axis = 1)
final_df

Unnamed: 0,ctx-lh-caudalanteriorcingulate/Left-Caudate,ctx-lh-caudalanteriorcingulate/ctx-rh-caudalmiddlefrontal,ctx-lh-caudalanteriorcingulate/ctx-rh-rostralmiddlefrontal,ctx-lh-caudalmiddlefrontal/ctx-rh-paracentral,ctx-lh-cuneus/ctx-lh-inferiortemporal,ctx-lh-cuneus/ctx-lh-middletemporal,ctx-lh-cuneus/ctx-lh-superiortemporal,ctx-lh-cuneus/ctx-lh-insula,ctx-lh-cuneus/Left-Thalamus-Proper,ctx-lh-cuneus/Left-Putamen,...,ctx-rh-paracentral/ctx-rh-posteriorcingulate,ctx-rh-pericalcarine/ctx-rh-superiortemporal,ctx-rh-pericalcarine/ctx-rh-insula,ctx-rh-postcentral/ctx-rh-posteriorcingulate,ctx-rh-posteriorcingulate/ctx-rh-precentral,age,sex,dd,edss,controls_ms
002MSVIS,0.384047,0.489209,0.446729,0.524677,0.426897,0.438663,0.449761,0.452155,0.427175,0.461376,...,0.373892,0.459131,0.481303,0.422469,0.456810,51.53,1,22.18,7.5,1
003MSVIS,0.307005,0.440887,0.426359,0.475393,0.433756,0.437115,0.439072,0.467706,0.431561,0.473701,...,0.387480,0.484542,0.509567,0.381282,0.379665,58.06,0,9.57,6.0,1
004MSVIS,0.353240,0.539666,0.518063,0.552257,0.485544,0.495635,0.494340,0.529751,0.459846,0.520597,...,0.395231,0.532746,0.569240,0.441564,0.468934,60.78,1,6.79,3.0,1
005MSVIS,0.392988,0.480349,0.455465,0.550401,0.462872,0.470048,0.474063,0.496681,0.490678,0.483483,...,0.405315,0.519941,0.553714,0.451796,0.460124,37.95,0,8.45,1.5,1
010MSVIS,0.381108,0.464374,0.459644,0.472687,0.473701,0.480289,0.480951,0.506304,0.460790,0.497279,...,0.385139,0.455612,0.478483,0.421267,0.433123,52.64,0,9.05,2.0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sFIS_04,0.398784,0.542326,0.500175,0.557633,0.510954,0.523408,0.526900,0.536187,0.527307,0.529803,...,0.425312,0.535670,0.567077,0.457714,0.451951,32.98,1,-1.00,0.0,0
sFIS_05,0.401528,0.514213,0.505240,0.563031,0.551277,0.563930,0.553221,0.568623,0.521056,0.559675,...,0.451312,0.548972,0.570932,0.484548,0.475987,23.76,0,-1.00,0.0,0
sFIS_06,0.413560,0.559308,0.533745,0.608780,0.564506,0.575095,0.576159,0.574720,0.554926,0.579100,...,0.512380,0.569164,0.582518,0.523785,0.527624,24.11,0,-1.00,0.0,0
sFIS_07,0.433221,0.537874,0.521213,0.559301,0.511498,0.522041,0.532287,0.542792,0.514955,0.537732,...,0.440810,0.533881,0.539735,0.473456,0.483277,23.98,0,-1.00,0.0,0


Unnamed: 0,ctx-lh-caudalanteriorcingulate/Left-Caudate,ctx-lh-caudalanteriorcingulate/ctx-rh-caudalmiddlefrontal,ctx-lh-caudalanteriorcingulate/ctx-rh-rostralmiddlefrontal,ctx-lh-caudalmiddlefrontal/ctx-rh-paracentral,ctx-lh-cuneus/ctx-lh-inferiortemporal,ctx-lh-cuneus/ctx-lh-middletemporal,ctx-lh-cuneus/ctx-lh-superiortemporal,ctx-lh-cuneus/ctx-lh-insula,ctx-lh-cuneus/Left-Thalamus-Proper,ctx-lh-cuneus/Left-Putamen,...,ctx-rh-paracentral/ctx-rh-posteriorcingulate,ctx-rh-pericalcarine/ctx-rh-superiortemporal,ctx-rh-pericalcarine/ctx-rh-insula,ctx-rh-postcentral/ctx-rh-posteriorcingulate,ctx-rh-posteriorcingulate/ctx-rh-precentral,age,sex,dd,edss,controls_ms
002MSVIS,0.384047,0.489209,0.446729,0.524677,0.426897,0.438663,0.449761,0.452155,0.427175,0.461376,...,0.373892,0.459131,0.481303,0.422469,0.456810,,,,,
003MSVIS,0.307005,0.440887,0.426359,0.475393,0.433756,0.437115,0.439072,0.467706,0.431561,0.473701,...,0.387480,0.484542,0.509567,0.381282,0.379665,,,,,
004MSVIS,0.353240,0.539666,0.518063,0.552257,0.485544,0.495635,0.494340,0.529751,0.459846,0.520597,...,0.395231,0.532746,0.569240,0.441564,0.468934,,,,,
005MSVIS,0.392988,0.480349,0.455465,0.550401,0.462872,0.470048,0.474063,0.496681,0.490678,0.483483,...,0.405315,0.519941,0.553714,0.451796,0.460124,,,,,
010MSVIS,0.381108,0.464374,0.459644,0.472687,0.473701,0.480289,0.480951,0.506304,0.460790,0.497279,...,0.385139,0.455612,0.478483,0.421267,0.433123,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
sFIS_04,,,,,,,,,,,...,,,,,,32.98,1.0,-1.0,0.0,0.0
sFIS_05,,,,,,,,,,,...,,,,,,23.76,0.0,-1.0,0.0,0.0
sFIS_06,,,,,,,,,,,...,,,,,,24.11,0.0,-1.0,0.0,0.0
sFIS_07,,,,,,,,,,,...,,,,,,23.98,0.0,-1.0,0.0,0.0
