In [None]:
!pip install gudhi
!pip install --upgrade category_encoders
!pip install dyneusr
!pip install umap-learn
!pip install -q latextable




# 1. Load Data

In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


## 1.1 Load Target values

In the target-dataset, we have 1206 patients(1206 rows in the dataset) and 583 different targets(583 different target values). If we build a machine learning model, we can predict these 583 different targets. The target selection is based on us.

In [3]:
#Load the target file to Jupyter Notebook

import pandas as pd
import numpy as np

df_struct = pd.read_csv('/content/drive/MyDrive/GUDHI-tda-tutorials-mapper/df_struct.csv')
print(df_struct.shape)

(1206, 583)


In [4]:
#Let's see what kind of targets we have:
df_struct.columns

Index(['Unnamed: 0', 'Subject', 'Release', 'Acquisition', 'Gender', 'Age',
       '3T_Full_MR_Compl', 'T1_Count', 'T2_Count', '3T_RS-fMRI_Count',
       ...
       'Noise_Comp', 'Odor_Unadj', 'Odor_AgeAdj', 'PainIntens_RawScore',
       'PainInterf_Tscore', 'Taste_Unadj', 'Taste_AgeAdj', 'Mars_Log_Score',
       'Mars_Errs', 'Mars_Final'],
      dtype='object', length=583)

## 1.2 Load DTI connectivity matrices of 998 patients:

Our goal is to predict targets given by connectivity matrices. These connectivity matrices represent neural relationships of different brain regions. We have 998 connectivity matrices so we have 998 patients.

In [5]:
# Let's now load it into a list of 998 pandas DataFrames
import pickle
open_file = open('/content/drive/MyDrive/GUDHI-tda-tutorials-mapper/DTI.txt', "rb")
loaded_DTI = pickle.load(open_file)
open_file.close()

In [6]:
#Just to check that we 998 connectivity matrixes
len(loaded_DTI), type(loaded_DTI)

(998, list)

The connectivity matrices have shape (116,116). That means we have 116 brain regions.

In [7]:
loaded_DTI[0].to_numpy().shape

(116, 116)

## 1.3 Load IDs of 998 patient:

In the target-dataset we had 1206 patients, however we have 998 connectivity matrices. So we need to select 998 patients' target values. How to do it? By matching the IDs of patients. First we load the patient IDs:

In [8]:
# You may need the IDs of all DTI networks in your project. We give this for you here:
#!wget -O 'IDsDTI.txt' https://www.dropbox.com/s/k7wuffrr0e26a7m/IDsDTI.txt?dl=0
open_file = open('/content/drive/MyDrive/GUDHI-tda-tutorials-mapper/IDsDTI.txt', "rb")
DTI_IDs = pickle.load(open_file)
open_file.close()

In [9]:
len(DTI_IDs)

998

In [10]:
DTI_IDs = pd.DataFrame(DTI_IDs,columns =['Subject'])
DTI_IDs.head()

Unnamed: 0,Subject
0,120212
1,108222
2,111009
3,393247
4,211821


## 1.3 Load Brain region names:

We may also need the 116 brain region names in future so we load it too:


In [11]:
# You may need the list of names
#!wget -O 'AAL_names.txt' https://www.dropbox.com/s/5fkp3s5suyibe57/AAL_names.txt?dl=0

In [12]:
open_file = open('/content/drive/MyDrive/GUDHI-tda-tutorials-mapper/AAL_names.txt', "rb")
loaded_AAL_names = pickle.load(open_file)
open_file.close()
len(loaded_AAL_names)

116

In [13]:
#loaded_AAL_names

# 2. Preprocessing Data

### 2.1 Match ID's with target values:

We do mathing the patient IDs with the target values here. The 'Subject' column in both datasets represent the IDs.

In [14]:
df_targets = df_struct.copy()

In [15]:
df_targets = df_struct.copy()

for i in range(len(df_targets)):
    
    flag=0
    
    for j in range(len(DTI_IDs)):
        if int(DTI_IDs['Subject'][j])== df_targets['Subject'][i]:
            flag = 1
        
    if flag==0:
        df_targets = df_targets.drop([i])
        

In [16]:
df_targets.shape

(998, 583)

### 2.2 Take tranpose to make matrices symmetric

Our connectivity matrices are not symmetric. The DTI_ab matrix measures the amount of fibers from a to b and DTI_ba from b to a, which may differ. Simply, we have a directed graph, and weights are in two opposite dimension. We need to make the matrice symmetric:

In [17]:
DTI_sym = loaded_DTI.copy()

In [18]:
loaded_DTI[0]

Unnamed: 0_level_0,Precentral_L,Precentral_R,Frontal_Sup_L,Frontal_Sup_R,Frontal_Sup_Orb_L,Frontal_Sup_Orb_R,Frontal_Mid_L,Frontal_Mid_R,Frontal_Mid_Orb_L,Frontal_Mid_Orb_R,...,Cerebelum_10_L,Cerebelum_10_R,Vermis_1_2,Vermis_3,Vermis_4_5,Vermis_6,Vermis_7,Vermis_8,Vermis_9,Vermis_10
data.1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Precentral_L,0.000000,0.111284,0.244940,0.036833,0.000000,0.000000,0.316256,0.031119,0.000000,0.037296,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Precentral_R,0.058161,0.000000,0.024979,0.077539,0.000000,0.000000,0.003282,0.193988,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Frontal_Sup_L,0.638208,0.124529,0.000000,0.278553,0.276687,0.012743,1.000000,0.275736,0.078628,0.029371,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Frontal_Sup_R,0.077435,0.311908,0.224755,0.000000,0.014218,1.000000,0.040737,1.000000,0.024073,0.350583,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Frontal_Sup_Orb_L,0.000000,0.000000,0.031933,0.002034,0.000000,0.000000,0.002370,0.000000,0.542085,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vermis_6,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.031301,0.237843,0.366248,0.000000,0.432853,0.124460,0.226047,1.000000
Vermis_7,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.043103,0.232500,0.000000,0.446245,0.076638,0.506090,0.000000,0.855930,0.774257,0.206943
Vermis_8,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.040640,0.415833,0.000000,0.473040,0.080923,0.134490,0.791066,0.000000,0.986443,0.269161
Vermis_9,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.057061,0.476667,0.000000,0.401919,0.094849,0.247622,0.725417,1.000000,0.000000,0.559513


In [19]:
#Make the matrices symmetric

for i in range(len(loaded_DTI)):
    DTI_sym[i] = (DTI_sym[i] + DTI_sym[i].T)/2 
DTI_sym[0]

Unnamed: 0_level_0,Precentral_L,Precentral_R,Frontal_Sup_L,Frontal_Sup_R,Frontal_Sup_Orb_L,Frontal_Sup_Orb_R,Frontal_Mid_L,Frontal_Mid_R,Frontal_Mid_Orb_L,Frontal_Mid_Orb_R,...,Cerebelum_10_L,Cerebelum_10_R,Vermis_1_2,Vermis_3,Vermis_4_5,Vermis_6,Vermis_7,Vermis_8,Vermis_9,Vermis_10
data.1,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Precentral_L,0.000000,0.084723,0.441574,0.057134,0.000000,0.000000,0.570140,0.037480,0.000000,0.021363,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Precentral_R,0.084723,0.000000,0.074754,0.194723,0.000000,0.000000,0.009822,0.358454,0.000000,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Frontal_Sup_L,0.441574,0.074754,0.000000,0.251654,0.154310,0.007062,1.000000,0.212413,0.045227,0.015506,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Frontal_Sup_R,0.057134,0.194723,0.251654,0.000000,0.008126,0.567128,0.045612,0.835060,0.014280,0.187429,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
Frontal_Sup_Orb_L,0.000000,0.000000,0.154310,0.008126,0.000000,0.000000,0.011454,0.000000,0.624236,0.000000,...,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Vermis_6,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.000000,0.000000,0.020406,0.178908,0.496907,0.000000,0.469472,0.129475,0.236835,0.685049
Vermis_7,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.029044,0.136159,0.000000,0.319384,0.094477,0.469472,0.000000,0.823498,0.749837,0.136225
Vermis_8,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.027964,0.246444,0.000000,0.346928,0.104622,0.129475,0.823498,0.000000,0.993222,0.180674
Vermis_9,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,0.000000,...,0.039117,0.281898,0.000000,0.293495,0.121606,0.236835,0.749837,0.993222,0.000000,0.374273


In [20]:
len(DTI_sym), type(DTI_sym), type(DTI_sym[0])

(998, list, pandas.core.frame.DataFrame)

### 2.4 Feature Extraction w/ SparseRipsPersistence

In [21]:
#!python -m pip install -U giotto-tda

In [22]:
"""
import numpy as np
from numpy.random import default_rng
rng = default_rng(42)  # Create a random number generator

from scipy.spatial.distance import pdist, squareform
from scipy.sparse import coo_matrix

from gtda.graphs import GraphGeodesicDistance
from gtda.homology import VietorisRipsPersistence, SparseRipsPersistence, FlagserPersistence

from igraph import Graph

from IPython.display import SVG, display
"""

'\nimport numpy as np\nfrom numpy.random import default_rng\nrng = default_rng(42)  # Create a random number generator\n\nfrom scipy.spatial.distance import pdist, squareform\nfrom scipy.sparse import coo_matrix\n\nfrom gtda.graphs import GraphGeodesicDistance\nfrom gtda.homology import VietorisRipsPersistence, SparseRipsPersistence, FlagserPersistence\n\nfrom igraph import Graph\n\nfrom IPython.display import SVG, display\n'

In [23]:
"""
symmetric_DTI_np = []
nonsymmetric_DTI_np = []
for i in range(len(DTI_sym)):
    symmetric_DTI_np.append(DTI_sym[i].to_numpy()) 
    nonsymmetric_DTI_np.append(loaded_DTI[i].to_numpy())
    
len(symmetric_DTI_np), len(nonsymmetric_DTI_np)
"""

'\nsymmetric_DTI_np = []\nnonsymmetric_DTI_np = []\nfor i in range(len(DTI_sym)):\n    symmetric_DTI_np.append(DTI_sym[i].to_numpy()) \n    nonsymmetric_DTI_np.append(loaded_DTI[i].to_numpy())\n    \nlen(symmetric_DTI_np), len(nonsymmetric_DTI_np)\n'

In [24]:
#!pip install -U giotto-tda

In [25]:
#Plotting:
#from gtda.plotting import plot_diagram
#i = 0
#plot_diagram(diagrams[i])

#### SparseRipsPersistence - Symmetric

In [26]:
"""
# Instantiate topological transformer
SP = SparseRipsPersistence( homology_dimensions=[0, 1, 2])

# Compute persistence diagrams corresponding to each entry
diagrams_sp_sym = SP.fit_transform(symmetric_DTI_np)

print(f"diagrams.shape: {diagrams_sp_sym.shape} ({diagrams_sp_sym.shape[1]} topological features)")
"""

'\n# Instantiate topological transformer\nSP = SparseRipsPersistence( homology_dimensions=[0, 1, 2])\n\n# Compute persistence diagrams corresponding to each entry\ndiagrams_sp_sym = SP.fit_transform(symmetric_DTI_np)\n\nprint(f"diagrams.shape: {diagrams_sp_sym.shape} ({diagrams_sp_sym.shape[1]} topological features)")\n'

In [27]:
"""
from gtda.diagrams import PersistenceEntropy

PE = PersistenceEntropy()

features_sp_sym = PE.fit_transform(diagrams_sp_sym)
features_sp_sym
"""

'\nfrom gtda.diagrams import PersistenceEntropy\n\nPE = PersistenceEntropy()\n\nfeatures_sp_sym = PE.fit_transform(diagrams_sp_sym)\nfeatures_sp_sym\n'

Save the Topological features into a .csv file

In [28]:
"""
# save numpy array as csv file
from numpy import asarray
from numpy import savetxt
# save to csv file
savetxt('SparseRipsPersistence_dim012_sym.csv', features_sp_sym, delimiter=',')

"""

"\n# save numpy array as csv file\nfrom numpy import asarray\nfrom numpy import savetxt\n# save to csv file\nsavetxt('SparseRipsPersistence_dim012_sym.csv', features_sp_sym, delimiter=',')\n\n"

Load topological features coming from SparseRips Complex:

In [29]:
# load numpy array from csv file
from numpy import loadtxt
# load array
loaded_features_sp_sym = loadtxt('/content/drive/MyDrive/GUDHI-tda-tutorials-mapper/SparseRipsPersistence_dim01_sym.csv', delimiter=',')
# print the array
print(loaded_features_sp_sym)
type(loaded_features_sp_sym)

[[6.81966661 4.52649199]
 [6.82150911 4.72372002]
 [6.81782604 4.45223201]
 ...
 [6.82301216 4.77680341]
 [6.8243857  4.41155091]
 [6.82064777 4.28775706]]


numpy.ndarray

Convert TDA features to a pandas df and add Gender column.

In [30]:
df_tda = pd.DataFrame(loaded_features_sp_sym, columns=["TDA_feature_1","TDA_feature_2"])  #,"TDA_feature_3"])
df_tda.head(5)

Unnamed: 0,TDA_feature_1,TDA_feature_2
0,6.819667,4.526492
1,6.821509,4.72372
2,6.817826,4.452232
3,6.820391,4.500798
4,6.82461,4.486151


### 2.5- Column meanings for printing

In [31]:
#COGNITION
Cognition_meanings2=[
                        "PicSeq_AgeAdj" , "Episodic Memory",
                         "CardSort_AgeAdj","Executive Function/ Cognitive Flexibility",
                         "Flanker_AgeAdj","Executive Function/ Inhibition",
                         
                         "PMAT24_A_CR","Fluid Intelligence",
                         "PMAT24_A_SI", "Fluid Intelligence",
                          "PMAT24_A_RTCR","Fluid Intelligence",
                         
                        "ReadEng_AgeAdj",   "Language/Reading Decoding ",
                        "PicVocab_AgeAdj",  "Language/Vocabulary Comprehension",
                        "ProcSpeed_AgeAdj", "Processing Speed" ,
                         
                         "DDisc_AUC_200","DDisc_AUC_40K", "Self-regulation/Impulsivity (Delay Discounting)",
                         "DDisc_SV_1mo_200", "Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_6mo_200","Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_1yr_200","Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_3yr_200","Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_5yr_200","Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_10yr_200", "Self-regulation/Impulsivity (Delay Discounting)",
                         "DDisc_SV_1mo_40K", "Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_6mo_40K","Self-regulation/Impulsivity (Delay Discounting)",
                        "DDisc_SV_1yr_40K","Self-regulation/Impulsivity (Delay Discounting)",
                        "DDisc_SV_3yr_40K","Self-regulation/Impulsivity (Delay Discounting)",
                        "DDisc_SV_5yr_40K","Self-regulation/Impulsivity (Delay Discounting)",
                          "DDisc_SV_10yr_40K","Self-regulation/Impulsivity (Delay Discounting)",

                        "VSPLOT_TC",        "Spatial Orientation",
                         "VSPLOT_CRTE","Spatial Orientation",
                         "VSPLOT_OFF","Spatial Orientation",

                         "SCPT_SEN","SCPT_SPEC", "Sustained Attention (Short Penn Continuous Performance Test)",
                         "SCPT_TP","Sustained Attention (Short Penn Continuous Performance Test)",
                     "SCPT_TN","Sustained Attention (Short Penn Continuous Performance Test)",
                    "SCPT_FP","Sustained Attention (Short Penn Continuous Performance Test)",
                    "SCPT_FN","Sustained Attention (Short Penn Continuous Performance Test)",
                    "SCPT_TPRT","Sustained Attention (Short Penn Continuous Performance Test)",
                    "SCPT_LRNR","Sustained Attention (Short Penn Continuous Performance Test)",
                         
                         
                         "IWRD_TOT",        "Verbal Episodic Memory",
                         "IWRD_RTC","Verbal Episodic Memory",
                    
                         "ListSort_AgeAdj", "Working Memory (List Sorting)"
                        ]  
#COGNITION
Cognition_meanings=[
                        "PicSeq_AgeAdj",   "COG_Episodic Memory",
                         "CardSort_AgeAdj", "COG_Executive Function/ Cognitive Flexibility",
                         "Flanker_AgeAdj",  "COG_Executive Function/ Inhibition",
                         
                         "PMAT24_A_CR",     "COG_Fluid Intelligence",
                         #"PMAT24_A_SI", "PMAT24_A_RTCR",
                         
                        "ReadEng_AgeAdj",   "COG_Language/Reading Decoding" ,
                        "PicVocab_AgeAdj",  "COG_Language/Vocabulary Comprehension",
                        "ProcSpeed_AgeAdj", "COG_Processing Speed ",
                         
                         "DDisc_AUC_200", "COG_Self-regulation/Impulsivity (Delay Discounting)",
                    "DDisc_AUC_40K", "COG_Self-regulation/Impulsivity (Delay Discounting)",
                         #"DDisc_SV_1mo_200", "DDisc_SV_6mo_200","DDisc_SV_1yr_200","DDisc_SV_3yr_200","DDisc_SV_5yr_200","DDisc_SV_10yr_200", 
                         #"DDisc_SV_1mo_40K", "DDisc_SV_6mo_40K","DDisc_SV_1yr_40K","DDisc_SV_3yr_40K","DDisc_SV_5yr_40K","DDisc_SV_10yr_40K",

                        "VSPLOT_TC",        "COG_Spatial Orientation",
                         #"VSPLOT_CRTE",
                         #"VSPLOT_OFF",

                         "SCPT_SEN", "COG_Sustained Attention",
                    "SCPT_SPEC", "COG_Sustained Attention" ,#"Sustained Attention" #(Short Penn Continuous Performance Test)
                         #"SCPT_TP", #"SCPT_TN","SCPT_FP","SCPT_FN","SCPT_TPRT","SCPT_LRNR",
                         
                         
                         "IWRD_TOT",        "COG_Verbal Episodic Memory",
                         #"IWRD_RTC",
                         "ListSort_AgeAdj", "COG_Working Memory (List Sorting)",
                        ]  

In [32]:
#EMOTION
Emotion_meanings=[
                       "ER40_CR", "EMO_Emotion Recognition (Penn Emotion Recognition Test)",
                       #"ER40_CRT","ER40ANG","ER40FEAR","ER40HAP","ER40NOE","ER40SAD",

                      "AngHostil_Unadj",  "EMO_Negative Affect ( Anger)",
                      #"AngAffect_Unadj","AngAggr_Unadj","FearAffect_Unadj",
                      "FearSomat_Unadj", "EMO_Negative Affect (Fear)",
                      "Sadness_Unadj", "EMO_Negative Affect (Sadness)",
                       
                       "LifeSatisf_Unadj","EMO_Psychological Well-being ",
                       #"MeanPurp_Unadj","PosAffect_Unadj",

                        "Loneliness_Unadj","EMO_Social Relationships ",
                       "Friendship_Unadj","EMO_PercHostil_Unadj",
                       #"PercReject_Unadj","EmotSupp_Unadj","InstruSupp_Unadj",

                       "PercStress_Unadj", "EMO_Stress and Self Efficacy",
                       #"SelfEff_Unadj"

                       
]


In [33]:
#MOTOR 
Motor_meanings=[
                     
                     "Endurance_AgeAdj", "MOT_Endurance (2 minute walk test)",
                     "GaitSpeed_Comp", "MOT_Locomotion (4-meter walk test)",
                     "Dexterity_AgeAdj", "MOT_Dexterity (9-hole Pegboard)",
                     "Strength_AgeAdj", "MOT_Strength (Grip Strength Dynamometry)"          
]

In [34]:
df_targets["GaitSpeed_Comp"]

1       1.24
2       1.58
3       1.51
4       1.10
5       1.24
        ... 
1200    1.07
1201    1.24
1202    1.23
1203    1.39
1205    1.07
Name: GaitSpeed_Comp, Length: 998, dtype: float64

In [35]:
#OTHER 
Other_meanings=[
                     "Age", "OTH_Age",
                    "Gender", "OTH_Gender",
                "TDA_feature_1", "OTH_TDA_feature_1",
                "TDA_feature_2","OTH_TDA_feature_2"
                     
]

In [36]:
# FreeSurfer Summary Statistics
Freesurfer_Summary_Statistics=[
    "FS_LCort_GM_Vol", "GMV_Left hemisphere cortical gray matter volume",
    "FS_RCort_GM_Vol", "GMV_Right hemisphere cortical gray matter volume",
    "FS_TotCort_GM_Vol", "GMV_Total cortical gray matter volume",
    "FS_SubCort_GM_Vol","GMV_Total subcortical gray matter volume",
    "FS_Total_GM_Vol", "GMV_Total gray matter volume",
    "FS_SupraTentorial_Vol", "FSS_Supratentorial volume",
    "FS_L_WM_Vol","WMV_Left hemisphere cortical white matter volume",
    "FS_R_WM_Vol", "WMV_Right hemisphere cortical white matter volume",
    "FS_Tot_WM_Vol","WMV_Total cortical white matter volume"
]

In [37]:
#ALL Surface Thickness
Surface_Thickness=[]
for i in range(0,len(df_targets.columns)):
  if df_targets.columns[i][-4:]=="Thck":
    Surface_Thickness.append(df_targets.columns[i])
    Surface_Thickness.append(df_targets.columns[i])



In [38]:
brain_meanings=[
    #Anterior Cingulate Cortex
   "FS_L_Caudalanteriorcingulate_Area", "SA__Left caudal-anterior-cingulate SA",
   	"FS_L_Rostralanteriorcingulate_Area" ,"SA__Left rostral-anterior-cingulate SA",
    "FS_R_Caudalanteriorcingulate_Area","SA__Right caudal-anterior-cingulate SA",
    	"FS_R_Rostralanteriorcingulate_Area","SA__Right rostral-anterior-cingulate SA",

      "FS_L_Caudalanteriorcingulate_Thck","Thc_Left caudal-anterior-cingulate Avg Thickness",
      	"FS_L_Rostralanteriorcingulate_Thck","Thc_Left rostral-anterior-cingulate Avg Thickness",
        	 "FS_R_Caudalanteriorcingulate_Thck","Thc_Right caudal-anterior-cingulate Avg Thickness",
           	"FS_R_Rostralanteriorcingulate_Thck", "Thc_Right rostral-anterior-cingulate Avg Thickness",
      #Orbitofrontal Cortex
     "FS_L_Lateralorbitofrontal_Area", "SA__Left lateral-orbito-frontal SA",
     	"FS_L_Medialorbitofrontal_Area","SA__Left medial-orbito-frontal SA",
       "FS_R_Lateralorbitofrontal_Area","SA__Right lateral-orbito-frontal SA",
      	"FS_R_Medialorbitofrontal_Area" , "SA__Right medial-orbito-frontal SA",

        "FS_L_Lateralorbitofrontal_Thck", "Thc_Left lateral-orbito-frontal Avg Thickness",
        "FS_L_Medialorbitofrontal_Thck", "Thc_Left medial-orbito-frontal Avg Thickness",
        "FS_R_Lateralorbitofrontal_Thck" ,"Thc_Right lateral-orbito-frontal Avg Thickness",
        "FS_R_Medialorbitofrontal_Thck", "Thc_Right medial-orbito-frontal Avg Thickness"	 



]
	

In [39]:
def make_dict(lists):
  mydic={}
  for i in range(0,len(lists),2):
    mydic[lists[i]]=lists[i+1]
  return mydic

all_meanings = Cognition_meanings + Emotion_meanings + Other_meanings +Motor_meanings+ Freesurfer_Summary_Statistics+brain_meanings
print(len(all_meanings))
meanings_dictionary = make_dict(all_meanings)
print(meanings_dictionary)


110
{'PicSeq_AgeAdj': 'COG_Episodic Memory', 'CardSort_AgeAdj': 'COG_Executive Function/ Cognitive Flexibility', 'Flanker_AgeAdj': 'COG_Executive Function/ Inhibition', 'PMAT24_A_CR': 'COG_Fluid Intelligence', 'ReadEng_AgeAdj': 'COG_Language/Reading Decoding', 'PicVocab_AgeAdj': 'COG_Language/Vocabulary Comprehension', 'ProcSpeed_AgeAdj': 'COG_Processing Speed ', 'DDisc_AUC_200': 'COG_Self-regulation/Impulsivity (Delay Discounting)', 'DDisc_AUC_40K': 'COG_Self-regulation/Impulsivity (Delay Discounting)', 'VSPLOT_TC': 'COG_Spatial Orientation', 'SCPT_SEN': 'COG_Sustained Attention', 'SCPT_SPEC': 'COG_Sustained Attention', 'IWRD_TOT': 'COG_Verbal Episodic Memory', 'ListSort_AgeAdj': 'COG_Working Memory (List Sorting)', 'ER40_CR': 'EMO_Emotion Recognition (Penn Emotion Recognition Test)', 'AngHostil_Unadj': 'EMO_Negative Affect ( Anger)', 'FearSomat_Unadj': 'EMO_Negative Affect (Fear)', 'Sadness_Unadj': 'EMO_Negative Affect (Sadness)', 'LifeSatisf_Unadj': 'EMO_Psychological Well-being ', 

#3. Select columns to add final dataset


##3.1 Select columns

In [40]:
#COGNITION
Cognition_feature_names=[
                        "PicSeq_AgeAdj",   #Episodic Memory
                         "CardSort_AgeAdj", #Executive Function/ Cognitive Flexibility
                         "Flanker_AgeAdj",  #Executive Function/ Inhibition
                         
                         "PMAT24_A_CR",     #Fluid Intelligence
                         #"PMAT24_A_SI", "PMAT24_A_RTCR",
                         
                        "ReadEng_AgeAdj",   #Language/Reading Decoding 
                        "PicVocab_AgeAdj",  #Language/Vocabulary Comprehension
                        "ProcSpeed_AgeAdj", #Processing Speed 
                         
                         "DDisc_AUC_200",
                         #"DDisc_AUC_40K", #Self-regulation/Impulsivity (Delay Discounting)
                         #"DDisc_SV_1mo_200", "DDisc_SV_6mo_200","DDisc_SV_1yr_200","DDisc_SV_3yr_200","DDisc_SV_5yr_200","DDisc_SV_10yr_200", 
                         #"DDisc_SV_1mo_40K", "DDisc_SV_6mo_40K","DDisc_SV_1yr_40K","DDisc_SV_3yr_40K","DDisc_SV_5yr_40K","DDisc_SV_10yr_40K",

                        "VSPLOT_TC",        #Spatial Orientation
                         #"VSPLOT_CRTE",
                         #"VSPLOT_OFF",

                         "SCPT_SEN","SCPT_SPEC", #Sustained Attention (Short Penn Continuous Performance Test)
                         #"SCPT_TP", #"SCPT_TN","SCPT_FP","SCPT_FN","SCPT_TPRT","SCPT_LRNR",
                         
                         
                         "IWRD_TOT",        #Verbal Episodic Memory
                         #"IWRD_RTC",
                         "ListSort_AgeAdj", #Working Memory (List Sorting)
                        ]  


In [41]:
#EMOTION
Emotion_feature_names=[
                       "ER40_CR", #Emotion Recognition (Penn Emotion Recognition Test)
                       #"ER40_CRT","ER40ANG","ER40FEAR","ER40HAP","ER40NOE","ER40SAD",

                      "AngHostil_Unadj",  #Negative Affect (Sadness, Fear, Anger)
                      #"AngAffect_Unadj","AngAggr_Unadj","FearAffect_Unadj",
                      "FearSomat_Unadj",
                      "Sadness_Unadj",
                       
                       "LifeSatisf_Unadj",#Psychological Well-being 
                       #"MeanPurp_Unadj","PosAffect_Unadj",

                        "Loneliness_Unadj",#Social Relationships 
                       "Friendship_Unadj",#"PercHostil_Unadj",
                       #"PercReject_Unadj","EmotSupp_Unadj","InstruSupp_Unadj",

                       "PercStress_Unadj", #Stress and Self Efficacy
                       #"SelfEff_Unadj"

                       

]

In [42]:
#MOTOR
Motor_feature_names=[
                     
                     "Endurance_AgeAdj", #Endurance (2 minute walk test)
                     "GaitSpeed_Comp", #Locomotion (4-meter walk test)
                     "Dexterity_AgeAdj", #Dexterity (9-hole Pegboard)
                     "Strength_AgeAdj", #Strength (Grip Strength Dynamometry)          
]

In [43]:
# FreeSurfer Summary Statistics
Freesurfer_Summary_Statistics_feature_names=[
    #"FS_LCort_GM_Vol", #"FSS_Left hemisphere cortical gray matter volume",
    #"FS_RCort_GM_Vol", #"FSS_Right hemisphere cortical gray matter volume",
    "FS_TotCort_GM_Vol", #"Total cortical gray matter volume",
    "FS_SubCort_GM_Vol",#"FSS_Total subcortical gray matter volume",
    "FS_Total_GM_Vol", #"FSS_Total gray matter volume",
    "FS_SupraTentorial_Vol", #"FSS_Supratentorial volume",
    #"FS_L_WM_Vol",#"Left hemisphere cortical white matter volume",
    #"FS_R_WM_Vol", #"Right hemisphere cortical white matter volume",
    "FS_Tot_WM_Vol"#,"Total cortical white matter volume"
]

In [44]:
#Thickness
Surface_Thickness_feature_names=[]
for i in range(0,len(df_targets.columns)):
  if df_targets.columns[i][-4:]=="Thck":
    Surface_Thickness_feature_names.append(df_targets.columns[i])


In [45]:
############################################
#FOR RESEARCH:
brain_features = [
    #Anterior Cingulate Cortex
   "FS_L_Caudalanteriorcingulate_Area", #"BRA_Left caudalanteriorcingulate Surface Area",
   	"FS_L_Rostralanteriorcingulate_Area" ,#"BRA_Left rostralanteriorcingulate Surface Area",
    "FS_R_Caudalanteriorcingulate_Area",#"Right caudalanteriorcingulate Surface Area",
    	"FS_R_Rostralanteriorcingulate_Area",#"Right rostralanteriorcingulate Surface Area",

      "FS_L_Caudalanteriorcingulate_Thck",#"Left caudalanteriorcingulate Average Thickness",
      	"FS_L_Rostralanteriorcingulate_Thck",#"Left rostralanteriorcingulate Average Thickness",
        	 "FS_R_Caudalanteriorcingulate_Thck",#"Right caudalanteriorcingulate Average Thickness",
           	"FS_R_Rostralanteriorcingulate_Thck",# "Right rostralanteriorcingulate Average Thickness",
      #Orbitofrontal Cortex
     "FS_L_Lateralorbitofrontal_Area", #"Left lateralorbitofrontal Surface Area",
     	"FS_L_Medialorbitofrontal_Area",#"Left medialorbitofrontal Surface Area",
       "FS_R_Lateralorbitofrontal_Area",#"Right lateralorbitofrontal Surface Area",
      	"FS_R_Medialorbitofrontal_Area" , #"Right medialorbitofrontal Surface Area",

        "FS_L_Lateralorbitofrontal_Thck", #"Left lateralorbitofrontal Average Thickness",
        "FS_L_Medialorbitofrontal_Thck", #"Left medialorbitofrontal Average Thickness",
        "FS_R_Lateralorbitofrontal_Thck", #,"Right lateralorbitofrontal Average Thickness",
        "FS_R_Medialorbitofrontal_Thck",# "Right medialorbitofrontal Average Thickness"	 



]

In [46]:
df_targets.reset_index(drop=True, inplace=True)
df_cognition = df_targets[Cognition_feature_names]
df_emotion = df_targets[Emotion_feature_names]
df_motor =df_targets[Motor_feature_names]
df_Freesurfer_Summary_Statistics = df_targets[Freesurfer_Summary_Statistics_feature_names]
df_Surface_Thickness_feature_names = df_targets[Surface_Thickness_feature_names]

df_brain=df_targets[brain_features]

##3.2 Age Column

In [47]:
np.sum(df_targets["Age"]=='26-30')

429

In [48]:
age_col  = df_targets["Age"].copy()
age_col

0      26-30
1      26-30
2      31-35
3      26-30
4      31-35
       ...  
993    31-35
994    31-35
995    26-30
996    26-30
997    26-30
Name: Age, Length: 998, dtype: object

In [49]:
df_targets.reset_index(drop=True, inplace=True)



#Group the age categories:
for i in range(len(age_col)):
  if age_col[i]== '31-35':
    age_col[i] = '31+'
  elif age_col[i]== '36+':
    age_col[i] = '31+'
  elif age_col[i]== '26-30':
    age_col[i] = '22-30'
  elif age_col[i]== '22-25':
    age_col[i] = '22-30'
  


In [50]:
age_col

0      22-30
1      22-30
2        31+
3      22-30
4        31+
       ...  
993      31+
994      31+
995    22-30
996    22-30
997    22-30
Name: Age, Length: 998, dtype: object

In [51]:

import category_encoders as ce

encoder= ce.OrdinalEncoder(cols=["Age"])

age_col = encoder.fit_transform(age_col)
age_col

Unnamed: 0,Age
0,1
1,1
2,2
3,1
4,2
...,...
993,2
994,2
995,1
996,1


##3.3 Gender Column

In [52]:
#For Jupyter notebook in Anaconda: (Make this on terminal, not in Jupyter notebook cell.)
#conda install -c conda-forge category_encoders
#For Colab:
#!pip install --upgrade category_encoders

import category_encoders as ce

df_targets.reset_index(drop=True, inplace=True)
gender_col  = df_targets[["Gender"]]

encoder= ce.OrdinalEncoder(cols=["Gender"])

gender_col = encoder.fit_transform(gender_col)
gender_col.head(4)


Unnamed: 0,Gender
0,1
1,2
2,1
3,1


In [53]:
df_tda_cognition = pd.concat([df_tda,
                              age_col,
                              gender_col, 
                              df_cognition,
                              df_emotion,
                              df_motor,
                              df_Freesurfer_Summary_Statistics,
                              df_brain]
                              #df_Surface_Thickness_feature_names]
                            , axis=1)
df_tda_cognition.shape

(998, 50)

### Replace NaN with mean

In [54]:
df_tda_cognition[list(df_tda_cognition.columns)] = df_tda_cognition[list(df_tda_cognition.columns)].fillna(value=df_tda_cognition[list(df_tda_cognition.columns)].mean())

df_tda_cognition.isnull().any()

TDA_feature_1                         False
TDA_feature_2                         False
Age                                   False
Gender                                False
PicSeq_AgeAdj                         False
CardSort_AgeAdj                       False
Flanker_AgeAdj                        False
PMAT24_A_CR                           False
ReadEng_AgeAdj                        False
PicVocab_AgeAdj                       False
ProcSpeed_AgeAdj                      False
DDisc_AUC_200                         False
VSPLOT_TC                             False
SCPT_SEN                              False
SCPT_SPEC                             False
IWRD_TOT                              False
ListSort_AgeAdj                       False
ER40_CR                               False
AngHostil_Unadj                       False
FearSomat_Unadj                       False
Sadness_Unadj                         False
LifeSatisf_Unadj                      False
Loneliness_Unadj                

#Implement Filters:

In [55]:
df_tda_cognition_np = df_tda_cognition.to_numpy()
df_tda_cognition_np.shape

(998, 50)

1. L_P norm:

In [56]:
#Define filter function for Mapper:
L_p_filter = np.zeros((998,))
print(L_p_filter.shape)

k=1
p=2

for patients in range(0,len(df_tda_cognition_np)):
  summ=0
  filter_temp = 0
  for features in range(len(df_tda_cognition_np[patients])):
    summ += (df_tda_cognition_np[patients][features])**p
    #print(df_tda_cognition_np[patients][features])
  filter_temp = summ**(k/p)
  L_p_filter[patients] = filter_temp

(998,)


In [57]:
L_p_filter[:10]

array([1691357.68900011, 1326920.05274379, 1511189.83304841,
       1578454.81831351, 1239485.90642318, 1353336.94579791,
       1493255.34421621, 1347723.08037592, 1472738.60399367,
       1657275.92676062])

L_infinity Centrality:

In [58]:
'''
from scipy.spatial import distance

L_infinity_filter = np.zeros((998,))

for i in range(0,len(df_tda_cognition_np)):
  max_distance=0

  for j in range(len(df_tda_cognition_np)):
    #if distance.euclidean(df_tda_cognition_np[i], df_tda_cognition_np[j]) > max_distance:
    if distance.correlation(df_tda_cognition_np[i], df_tda_cognition_np[j]) > max_distance:
    
      max_distance = distance.correlation(df_tda_cognition_np[i], df_tda_cognition_np[j])

  L_infinity_filter[i] = max_distance

L_infinity_filter.shape, L_infinity_filter[:10]
'''

'\nfrom scipy.spatial import distance\n\nL_infinity_filter = np.zeros((998,))\n\nfor i in range(0,len(df_tda_cognition_np)):\n  max_distance=0\n\n  for j in range(len(df_tda_cognition_np)):\n    #if distance.euclidean(df_tda_cognition_np[i], df_tda_cognition_np[j]) > max_distance:\n    if distance.correlation(df_tda_cognition_np[i], df_tda_cognition_np[j]) > max_distance:\n    \n      max_distance = distance.correlation(df_tda_cognition_np[i], df_tda_cognition_np[j])\n\n  L_infinity_filter[i] = max_distance\n\nL_infinity_filter.shape, L_infinity_filter[:10]\n'

In [59]:
#L-infinity centrality, which assigns to each point the distance to the point most distant from it. 

#L-infinity centrality is defined for each data point y to be the maximum distance from y to any other data point in the 
#data set. It produces a more detailed and succinct description of the data set than a typical scatter plots display. 
#Large values of this function correspond to points that are far from the center of the data set. 
from scipy.spatial.distance import pdist, squareform

#pairwise_dist = squareform(pdist(df_tda_cognition_np, 'euclidean')) 
pairwise_dist = squareform(pdist(df_tda_cognition_np, 'correlation'))

L_infinity = np.amax(pairwise_dist, axis = 1)
L_infinity.shape, L_infinity[:10]

((998,), array([0.00284452, 0.00387969, 0.00389691, 0.00328126, 0.00652671,
        0.00314194, 0.00355573, 0.00448024, 0.00288869, 0.00339839]))

In [60]:
from sklearn.decomposition import TruncatedSVD

svd = TruncatedSVD(n_components=1, n_iter=7, random_state=42)
SVD_filter = svd.fit_transform(df_tda_cognition_np)
print(SVD_filter.shape)


(998, 1)


In [61]:
df_targets.Age

0      26-30
1      26-30
2      31-35
3      26-30
4      31-35
       ...  
993    31-35
994    31-35
995    26-30
996    26-30
997    26-30
Name: Age, Length: 998, dtype: object

# 5. KepplerMapper


In [62]:
df_tda_cognition_np.shape

(998, 50)

In [63]:
#target_col = df_tda_cognition["ListSort_AgeAdj"].to_numpy()
#SVD_filter
#L_infinity[:,np.newaxis]
#L_p_filter[:,np.newaxis]

age_col = df_tda_cognition["Age"].to_numpy()
memory_col = df_tda_cognition["ListSort_AgeAdj"].to_numpy()
#filters = np.concatenate(( age_col[:,np.newaxis], SVD_filter), axis=1)
#filters= np.concatenate(( SVD_filter, L_infinity[:,np.newaxis]), axis=1)

In [64]:
from dyneusr import DyNeuGraph
from dyneusr.datasets import make_trefoil
from dyneusr.tools import visualize_mapper_stages
import kmapper
from kmapper import KeplerMapper

from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn import manifold
from sklearn.manifold import TSNE, Isomap
from sklearn.decomposition import PCA,TruncatedSVD
import umap.umap_ as umap
from sklearn import preprocessing
from sklearn.preprocessing import MinMaxScaler

def create_y_gender(gender_col):
  cols= np.unique(gender_col)

  male = [1 if gender_col[i]==1 else 0 for i in range(len(gender_col)) ]
  female = [1 if gender_col[i]==2 else 0 for i in range(len(gender_col)) ]
  

  d = {'male': male,  'female':female}
  df = pd.DataFrame(data=d)
  return df

##########
def create_y_age(age_col):
  cols= np.unique(age_col)

  age22_30 = [1 if age_col[i]==1 else 0 for i in range(len(age_col)) ]
  age31 = [1 if age_col[i]==2 else 0 for i in range(len(age_col)) ]
  

  d = {'22-30': age22_30,  '31+':age31}
  df = pd.DataFrame(data=d)
  return df

age_col = df_tda_cognition["Age"].to_numpy()
gender_col = df_tda_cognition["Gender"].to_numpy()
X = df_tda_cognition_np
y = create_y_age(age_col)
#y = create_y_gender(gender_col)
#y = df_tda_cognition["FS_L_Lateralorbitofrontal_Area"].to_numpy() #memory_col



In [65]:
#y= df_tda_cognition["FS_Total_GM_Vol"].to_numpy()[:,np.newaxis] #Total gray matter volume
#y= df_tda_cognition["FS_SubCort_GM_Vol"].to_numpy()[:,np.newaxis] #Total subcortical gray matter volume
#y.shape

In [66]:
AGE_lens = age_col[:,np.newaxis]
L_inf_lens = L_infinity[:,np.newaxis]
L_p_filter_lens = L_p_filter[:,np.newaxis]



import sklearn
from sklearn.cluster import DBSCAN, AgglomerativeClustering

mapper = KeplerMapper(verbose=1)

#TfidfVectorizer_lens = mapper.fit_transform(X, 
                                            #projection= fidfVectorizer(analyzer="char",ngram_range=(1,6), max_df=0.93, min_df=0.03))

#Umap_lens = mapper.fit_transform(X, projection= umap.UMAP(n_components=2, random_state=1))
#isomap_lens = mapper.fit_transform(X,projection=[manifold.Isomap(n_components=1, n_jobs=-1)]) 
#TSNE_lens = mapper.fit_transform(X, projection=TSNE(2))
SVD_lens = mapper.fit_transform(X, projection= TruncatedSVD(n_components=2))
#PCA_lens = mapper.fit_transform(X, projection= PCA(n_components=1))


#two_lenses = np.c_[L_p_filter_lens, TSNE_lens]
#two_lenses = np.c_[SVD_lens,  L_inf_lens]

graphh = mapper.map(SVD_lens, X, 
                   sklearn.cluster.DBSCAN(eps=20., min_samples=5, metric="correlation"),
                   #clusterer = AgglomerativeClustering(),
                   cover=kmapper.Cover(n_cubes=50, perc_overlap=62/100))


# Visualize the shape graph using DyNeuSR's DyNeuGraph                          
dG = DyNeuGraph(G=graphh, y=y)
dG.visualize('SVDx2_50_0.62_color=GENDER.html')

KeplerMapper()
..Composing projection pipeline of length 1:
	Projections: TruncatedSVD()
	Distance matrices: False
	Scalers: MinMaxScaler()
..Projecting on data shaped (998, 50)

..Projecting data using: 
	TruncatedSVD()


..Scaling with: MinMaxScaler()

Mapping on data shaped (998, 50) using lens shaped (998, 2)

Creating 2500 hypercubes.

Created 4529 edges and 563 nodes in 0:00:00.682826.


label,group,value,row_count
22-30,0,647,998
31+,1,351,998


  node_members = np.array(list(node_to_members.values()))


   > Found 0 nodes for data point 0.
   > Found 0 nodes for data point 4.
   > Found 0 nodes for data point 23.
   > Found 0 nodes for data point 26.
   > Found 0 nodes for data point 36.
   > Found 0 nodes for data point 44.
   > Found 0 nodes for data point 46.
   > Found 0 nodes for data point 56.
   > Found 0 nodes for data point 57.
   > Found 0 nodes for data point 64.
   > Found 0 nodes for data point 79.
   > Found 0 nodes for data point 82.
   > Found 0 nodes for data point 89.
   > Found 0 nodes for data point 168.
   > Found 0 nodes for data point 169.
   > Found 0 nodes for data point 189.
   > Found 0 nodes for data point 205.
   > Found 0 nodes for data point 206.
   > Found 0 nodes for data point 207.
   > Found 0 nodes for data point 228.
   > Found 0 nodes for data point 235.
   > Found 0 nodes for data point 247.
   > Found 0 nodes for data point 254.
   > Found 0 nodes for data point 274.
   > Found 0 nodes for data point 282.
   > Found 0 nodes for data point 294.
 

DyNeuGraph()

In [67]:
'''
from sklearn.cluster import AgglomerativeClustering
import sklearn
from sklearn.cluster import DBSCAN

age_col = df_tda_cognition["Age"].to_numpy()
AGE_lens = age_col[:,np.newaxis]
L_p_filter_lens = L_p_filter[:,np.newaxis]

L_inf_lens = L_infinity[:,np.newaxis]


mapper2 = KeplerMapper(verbose=1)


#TfidfVectorizer_lens = mapper.fit_transform(X, projection= fidfVectorizer(analyzer="char",ngram_range=(1,6), max_df=0.93, min_df=0.03))

#Umap_lens = mapper2.fit_transform(X, projection= umap.UMAP(n_components=2, random_state=1))
#isomap_lens = mapper.fit_transform(X,projection=[manifold.Isomap(n_components=1, n_jobs=-1)]) 
#PCA_lens = mapper.fit_transform(X, projection= PCA(n_components=1))

#SVD_lens = mapper2.fit_transform(X, projection= TruncatedSVD(n_components=2))
TSNE_lens = mapper2.fit_transform(X, projection=TSNE(2))

numm = len(range(20,50, 2)) * len(range(20, 50, 2))
#two_lenses = np.c_[SVD_lens, L_p_filter_lens]

for n_cubes in range(20,50, 2):
  for perc_overlap in range(20, 50 , 2):

    graphh2 = mapper2.map(TSNE_lens, X, 
                      #clusterer = sklearn.cluster.DBSCAN(eps=20., min_samples=5, metric="correlation"),
                      clusterer = AgglomerativeClustering(linkage = 'single'),
                      cover=kmapper.Cover(n_cubes=n_cubes, perc_overlap=(perc_overlap/100)) )

    dG = DyNeuGraph(G=graphh2, y=y)
    dG.visualize('/content/drive/MyDrive/a_results/Agg_TSNE_x2/'+ str(n_cubes)+'_'+str(perc_overlap/100)+'_result_'+str(numm)+'.html')

    numm-=1

'''

'\nfrom sklearn.cluster import AgglomerativeClustering\nimport sklearn\nfrom sklearn.cluster import DBSCAN\n\nage_col = df_tda_cognition["Age"].to_numpy()\nAGE_lens = age_col[:,np.newaxis]\nL_p_filter_lens = L_p_filter[:,np.newaxis]\n\nL_inf_lens = L_infinity[:,np.newaxis]\n\n\nmapper2 = KeplerMapper(verbose=1)\n\n\n#TfidfVectorizer_lens = mapper.fit_transform(X, projection= fidfVectorizer(analyzer="char",ngram_range=(1,6), max_df=0.93, min_df=0.03))\n\n#Umap_lens = mapper2.fit_transform(X, projection= umap.UMAP(n_components=2, random_state=1))\n#isomap_lens = mapper.fit_transform(X,projection=[manifold.Isomap(n_components=1, n_jobs=-1)]) \n#PCA_lens = mapper.fit_transform(X, projection= PCA(n_components=1))\n\n#SVD_lens = mapper2.fit_transform(X, projection= TruncatedSVD(n_components=2))\nTSNE_lens = mapper2.fit_transform(X, projection=TSNE(2))\n\nnumm = len(range(20,50, 2)) * len(range(20, 50, 2))\n#two_lenses = np.c_[SVD_lens, L_p_filter_lens]\n\nfor n_cubes in range(20,50, 2):\n  f

In [68]:
for keys, vals in graphh.items():
  print(keys)
  print(vals)
  print()

nodes
defaultdict(<class 'list'>, {'cube19_cluster0': [112, 404, 824, 852, 926], 'cube20_cluster0': [112, 404, 824, 852, 926], 'cube31_cluster0': [338, 404, 824, 852, 926], 'cube32_cluster0': [338, 404, 824, 852, 926, 950], 'cube45_cluster0': [90, 157, 309, 366, 951], 'cube50_cluster0': [338, 404, 821, 852, 926], 'cube51_cluster0': [317, 338, 404, 662, 852, 926, 950], 'cube52_cluster0': [239, 317, 404, 662, 926, 950], 'cube53_cluster0': [239, 317, 557, 662, 950], 'cube55_cluster0': [167, 332, 557, 581, 825, 930], 'cube56_cluster0': [167, 332, 581, 659, 825, 930], 'cube71_cluster0': [157, 309, 488, 845, 989], 'cube74_cluster0': [38, 212, 283, 335, 364, 821], 'cube75_cluster0': [38, 212, 283, 335, 364, 710, 821], 'cube76_cluster0': [212, 335, 364, 710, 821], 'cube78_cluster0': [239, 317, 662, 836, 950], 'cube79_cluster0': [239, 317, 557, 662, 836, 950], 'cube82_cluster0': [167, 195, 332, 659, 742, 825], 'cube100_cluster0': [212, 283, 335, 364, 821], 'cube101_cluster0': [212, 233, 283, 33

In [69]:
from scipy.sparse.csgraph    import dijkstra, shortest_path, connected_components
from scipy.stats             import ks_2samp

def compute_differential_coordinates(df_tda_cognition, bnd_tmp_i):
    
    #Data points that create this community
    list_idxs1 = list(np.unique(bnd_tmp_i))
    #Data points that do not create this community
    list_idxs2 = list(set(np.arange(X.shape[0]))-set(list_idxs1))

    df_tda_cognition_np = df_tda_cognition.to_numpy()

    features = [i for i in range(0, len(df_tda_cognition.columns))]
    
    pvals = []
    for f in features:
        group1, group2 = df_tda_cognition_np[list_idxs1, f], df_tda_cognition_np[list_idxs2, f]

        _,pval = ks_2samp(group1, group2)
        
        pvals.append(pval)
    
    pvals = np.array(pvals)

    features = np.array(features)
    F_ks, P_ks = features[np.argsort(pvals)], np.sort(pvals)
    return F_ks, P_ks 




In [70]:
def get_AGE_of_community(df_tda_cognition, bnd_tmp_i):

    group1 = X[bnd_tmp_i,2]
    ages=np.unique(group1)
    years=[]
    for i in ages:
      if i==1:
        years.append("22-30")
      elif i==2:
        years.append("31+")
      
    return years
#x = get_AGE_of_community(cover_complex, nodes=bnd_temp[0])

In [71]:
def get_number_of_communities_to_print(df_tda_cognition, bnd_temp):
  row=[["Clinical features","Cat."]]
  AGE=""
  for i in range(0,len(bnd_temp)):
    AGE = get_AGE_of_community(df_tda_cognition, bnd_temp[i])
    AGE = str(AGE)
    AGE= AGE[2:]
    AGE= AGE[:-2]
    #row[0].append(str(AGE))
    row[0].append("G"+str(i+1))
  return row

def is_it_in_other_comm(feature, feature_meaning, all_comm_features ): #,all_means,com_numb):
  data_columns = list(df_tda_cognition.columns)
  thicks=[]
  meann=""
  
  #print("MEEEAN",meann)

  for i in range(len(all_comm_features)):
    meann=""
    #mean_float = "{:.0f}".format(all_means[i][feature])
    #meann=meann+ str(mean_float)
    if feature_meaning in all_comm_features[i]:
      thicks.append("X") #+ " (" +str(meann)+")")
    else:
      thicks.append("-") # + " (" +str(meann)+")")
      
  return thicks

In [72]:

def print_table( meanings_dictionary, bnd_temp, df_tda_cognition):  #, all_means):



  all_feature_meanings=[]

  for i in range(0,len(bnd_temp)):
    
    comm=[]
    coordinates, pvalues = compute_differential_coordinates(df_tda_cognition, bnd_temp[i])
    #print("Feature names that creates this community:")
    featuress= [df_tda_cognition.columns[j0] for j0 in coordinates]

    for j in range(0,len(featuress)):
      if pvalues[j] < 0.05:
        comm.append(meanings_dictionary[featuress[j]])
    all_feature_meanings.append(comm)
  

  
  for i in range(0,len(bnd_temp)):
    feat_meaning=[]
    feat_code=[]
    coordinates, pvalues = compute_differential_coordinates(df_tda_cognition, bnd_temp[i])
    
    #print("Feature names that creates this community:")
    featuress= [df_tda_cognition.columns[j1] for j1 in coordinates]

    #PValue check:
    for j in range(0,len(featuress)):
      if pvalues[j] < 0.05:
        feat_meaning.append(meanings_dictionary[featuress[j]])
        feat_code.append(featuress[j])

    category = ""
    feature_name = ""
    rows = get_number_of_communities_to_print(df_tda_cognition, bnd_temp)
    for k in range(0,len(feat_meaning)):
      category, feature_name = feat_meaning[k][0:3],feat_meaning[k][4:]
      
      row_tmp = [feature_name, category]
      row_tmp = row_tmp + is_it_in_other_comm( feat_code[k], feat_meaning[k], all_feature_meanings ) #,all_means,i)
           
      rows.append(row_tmp)

    #print("rows -->",rows)
    table = Texttable()
    table.set_cols_align(["c"] * (len(bnd_temp)+2))
    table.set_deco(Texttable.HEADER | Texttable.VLINES)
    table.add_rows(rows)
    
    print("\n--------------------------------------------------------------")#, file=f)
    
    AGE=""
    AGE = get_AGE_of_community(df_tda_cognition, bnd_temp[i])
    print("COMMUNITY-",i+1 , ", " , AGE ) #, file=f)
    print(table.draw()) #, file=f)
    
    print("--------------------------------------------------------------")#, file=f)
  #f.close()






In [73]:
from tabulate import tabulate
from texttable import Texttable

import latextable

#31+
comm1 = ['cube55_cluster0', 'cube56_cluster0','cube82_cluster0','cube137_cluster0','cube109_cluster0','cube138_cluster0',
        'cube202_cluster0','cube139_cluster0','cube168_cluster0','cube203_cluster0','cube140_cluster0', 'cube167_cluster0',
         'cube204_cluster0', 'cube169_cluster0','cube205_cluster0','cube141_cluster0','cube170_cluster0', 'cube207_cluster0',
         'cube171_cluster0', 'cube206_cluster0',
         
         'cube394_cluster0', 'cube363_cluster0','cube393_cluster0','cube362_cluster0','cube332_cluster0','cube333_cluster0',
        'cube392_cluster0','cube361_cluster0','cube210_cluster0','cube243_cluster0','cube175_cluster0', 
         'cube276_cluster0','cube242_cluster0',
         ]


#[20-30]
comm2 = ['cube1027_cluster0', 'cube970_cluster0','cube936_cluster0','cube778_cluster0','cube935_cluster0','cube821_cluster0',
         'cube1026_cluster0', 'cube969_cluster0','cube900_cluster0','cube819_cluster0','cube897_cluster0', 'cube780_cluster0',
         'cube995_cluster0', 'cube968_cluster0','cube901_cluster0','cube820_cluster0','cube899_cluster0','cube898_cluster0',
         'cube996_cluster0', 'cube937_cluster0','cube861_cluster0','cube779_cluster0','cube859_cluster0','cube858_cluster0',
         'cube857_cluster0', 'cube817_cluster0','cube743_cluster0',
         
         
         'cube743_cluster0', 'cube744_cluster0','cube745_cluster0','cube821_cluster0','cube781_cluster0','cube780_cluster0',
         'cube779_cluster0', 'cube778_cluster0','cube938_cluster0','cube902_cluster0','cube862_cluster0','cube861_cluster0',
         'cube863_cluster0', 'cube903_cluster0','cube708_cluster0',
         ]

#[20-30]
comm3 = ['cube633_cluster0', 'cube563_cluster0','cube597_cluster0','cube738_cluster0', 'cube560_cluster0','cube441_cluster0',
         'cube632_cluster0', 'cube561_cluster0','cube519_cluster0','cube440_cluster0', 'cube629_cluster0','cube562_cluster0',
         'cube665_cluster0', 'cube663_cluster0','cube478_cluster0','cube518_cluster0', 'cube520_cluster0','cube630_cluster0',
         'cube631_cluster0', 'cube702_cluster0','cube479_cluster0','cube442_cluster0', 'cube559_cluster0',  
         'cube599_cluster0', 'cube664_cluster0','cube701_cluster0' ,'cube517_cluster0', 'cube476_cluster0', 
         'cube516_cluster0', 'cube473_cluster0','cube474_cluster0', 'cube477_cluster0','cube558_cluster0','cube598_cluster0',
          
         'cube666_cluster0', 'cube600_cluster0','cube564_cluster0', 'cube563_cluster0','cube521_cluster0','cube479_cluster0',
         'cube443_cluster0', 'cube480_cluster0','cube403_cluster0', 'cube404_cluster0','cube522_cluster0'
          ]


#MIX AGE
comm4 = ['cube1078_cluster0', 'cube1077_cluster0','cube1075_cluster0','cube1074_cluster0', 'cube1072_cluster0','cube1038_cluster0',
         'cube1039_cluster0', 'cube1040_cluster0','cube1041_cluster0','cube983_cluster0', 'cube984_cluster0','cube985_cluster0',
         'cube953_cluster0', 'cube917_cluster0','cube916_cluster0','cube952_cluster0', 'cube915_cluster0','cube951_cluster0',
         
         'cube986_cluster0', 'cube987_cluster0','cube918_cluster0','cube954_cluster0', 'cube919_cluster0',  
         'cube955_cluster0', 'cube920_cluster0','cube879_cluster0' ,'cube921_cluster0', 'cube880_cluster0', 
         'cube881_cluster0', 'cube841_cluster0','cube840_cluster0', 'cube800_cluster0','cube799_cluster0',
          
         'cube764_cluster0', 'cube763_cluster0','cube762_cluster0', 'cube837_cluster0','cube796_cluster0','cube760_cluster0',
         'cube721_cluster0', 'cube682_cluster0','cube683_cluster0', 'cube722_cluster0',
          ]       


comms = []
comms.append(comm1)
comms.append(comm2)
comms.append(comm3)
comms.append(comm4)



bnd_tmp=[]
for comm in comms:
  patients_ids = []
  #print(comm)
  for i in comm:
    patients_ids += graphh['nodes'][i]
  bnd_tmp.append(np.unique(patients_ids))
  
#comm_features, p_values = compute_differential_coordinates(df_tda_cognition, all_patients_ids)

print_table( meanings_dictionary, bnd_tmp, df_tda_cognition) #, all_means)



--------------------------------------------------------------
COMMUNITY- 1 ,  ['22-30', '31+']
               Clinical features                | Cat. | G1 | G2 | G3 | G4
           Total gray matter volume             | GMV  | X  | X  | X  | X 
       Total cortical gray matter volume        | GMV  | X  | X  | X  | X 
             Supratentorial volume              | FSS  | X  | X  | X  | X 
     Total subcortical gray matter volume       | GMV  | X  | X  | -  | X 
        Right lateral-orbito-frontal SA         | SA_  | X  | X  | -  | X 
        Left lateral-orbito-frontal SA          | SA_  | X  | X  | X  | X 
      Left rostral-anterior-cingulate SA        | SA_  | X  | X  | X  | X 
      Total cortical white matter volume        | WMV  | X  | X  | X  | X 
        Right medial-orbito-frontal SA          | SA_  | X  | X  | -  | X 
                    Gender                      | OTH  | X  | X  | X  | X 
                      Age                       | OTH  | X  | X  | X  | - 
   

In [75]:
def get_means_and_pvalues_helper(data_columns, bnd_tmp_i ):
  #Data points that create this community
  list_idxs1 = list(np.unique(bnd_tmp_i))
  #Other data points that do not create this community
  list_idxs2 = list(set(np.arange(X.shape[0]))-set(list_idxs1))

  pvals = {}
  means = {}

  features = [i for i in range(0, len(df_tda_cognition.columns))]
  for f in features:
    group1, group2 = df_tda_cognition_np[list_idxs1, f], df_tda_cognition_np[list_idxs2, f]
    _,pval = ks_2samp(group1, group2)


    means[data_columns[f]] = np.mean(group1)
    pvals[data_columns[f]] = pval

   
  return means, pvals

In [78]:
comm_number = 0


def get_means_and_pvalues( meanings_dictionary, bnd_temp, numm, df_tda_cognition):

  data_columns = list(df_tda_cognition.columns)
  
  #f = open("statistics"+".txt", "a")
  
  all_means=[]
  for i in range(0,len(bnd_temp)):
    means, pvals = get_means_and_pvalues_helper( data_columns, bnd_temp[i])
    #featuress= [df_tda_cognition.columns[j] for j in coordinates]

    
    all_means.append(means)
    '''
    print("--------------------------------------------------------------" ) #,file=f)
    print("COMMUNITY-",i) #, file=f)


    for j in range(0,len(means)):
      print(data_columns[j]) # , file=f)
      print("p-value = ", pvals[data_columns[j]]) #, file=f)
      print("mean = ", means[data_columns[j]]) #, file=f)
      print(" ") #, file=f)
    '''
  #f.close()
  return all_means, pvals

In [79]:



def get_one_nodes_avg_value(nodeee):
  #To find an avg value of a feature in just one node
  nodes=[]
  #node= ['cube598_cluster0']
  node = nodeee
  nodes.append(np.unique(node))

  bnd_tmp2=[]
  for comm2 in nodes:
    patients_ids = []
    for i in comm2:
      patients_ids += graphh['nodes'][i]
    bnd_tmp2.append(np.unique(patients_ids))

  all_means2, pvalues2 = get_means_and_pvalues(meanings_dictionary, bnd_tmp2, 0, df_tda_cognition)
  return all_means2[0]["FS_R_Rostralanteriorcingulate_Thck"], len(bnd_tmp2[0])

#31+
comm1 = ['cube55_cluster0', 'cube56_cluster0','cube82_cluster0','cube137_cluster0','cube109_cluster0','cube138_cluster0',
        'cube202_cluster0','cube139_cluster0','cube168_cluster0','cube203_cluster0','cube140_cluster0', 'cube167_cluster0',
         'cube204_cluster0', 'cube169_cluster0','cube205_cluster0','cube141_cluster0','cube170_cluster0', 'cube207_cluster0',
         'cube171_cluster0', 'cube206_cluster0',
         
         'cube394_cluster0', 'cube363_cluster0','cube393_cluster0','cube362_cluster0','cube332_cluster0','cube333_cluster0',
        'cube392_cluster0','cube361_cluster0','cube210_cluster0','cube243_cluster0','cube175_cluster0', 
         'cube276_cluster0','cube242_cluster0',
         ]


#[20-30]
comm2 = ['cube1027_cluster0', 'cube970_cluster0','cube936_cluster0','cube778_cluster0','cube935_cluster0','cube821_cluster0',
         'cube1026_cluster0', 'cube969_cluster0','cube900_cluster0','cube819_cluster0','cube897_cluster0', 'cube780_cluster0',
         'cube995_cluster0', 'cube968_cluster0','cube901_cluster0','cube820_cluster0','cube899_cluster0','cube898_cluster0',
         'cube996_cluster0', 'cube937_cluster0','cube861_cluster0','cube779_cluster0','cube859_cluster0','cube858_cluster0',
         'cube857_cluster0', 'cube817_cluster0','cube743_cluster0',
         
         
         'cube743_cluster0', 'cube744_cluster0','cube745_cluster0','cube821_cluster0','cube781_cluster0','cube780_cluster0',
         'cube779_cluster0', 'cube778_cluster0','cube938_cluster0','cube902_cluster0','cube862_cluster0','cube861_cluster0',
         'cube863_cluster0', 'cube903_cluster0','cube708_cluster0',
         ]

#[20-30]
comm3 = ['cube633_cluster0', 'cube563_cluster0','cube597_cluster0','cube738_cluster0', 'cube560_cluster0','cube441_cluster0',
         'cube632_cluster0', 'cube561_cluster0','cube519_cluster0','cube440_cluster0', 'cube629_cluster0','cube562_cluster0',
         'cube665_cluster0', 'cube663_cluster0','cube478_cluster0','cube518_cluster0', 'cube520_cluster0','cube630_cluster0',
         'cube631_cluster0', 'cube702_cluster0','cube479_cluster0','cube442_cluster0', 'cube559_cluster0',  
         'cube599_cluster0', 'cube664_cluster0','cube701_cluster0' ,'cube517_cluster0', 'cube476_cluster0', 
         'cube516_cluster0', 'cube473_cluster0','cube474_cluster0', 'cube477_cluster0','cube558_cluster0','cube598_cluster0',
          
         'cube666_cluster0', 'cube600_cluster0','cube564_cluster0', 'cube563_cluster0','cube521_cluster0','cube479_cluster0',
         'cube443_cluster0', 'cube480_cluster0','cube403_cluster0', 'cube404_cluster0','cube522_cluster0'
          ]


#MIX AGE
comm4 = ['cube1078_cluster0', 'cube1077_cluster0','cube1075_cluster0','cube1074_cluster0', 'cube1072_cluster0','cube1038_cluster0',
         'cube1039_cluster0', 'cube1040_cluster0','cube1041_cluster0','cube983_cluster0', 'cube984_cluster0','cube985_cluster0',
         'cube953_cluster0', 'cube917_cluster0','cube916_cluster0','cube952_cluster0', 'cube915_cluster0','cube951_cluster0',
         
         'cube986_cluster0', 'cube987_cluster0','cube918_cluster0','cube954_cluster0', 'cube919_cluster0',  
         'cube955_cluster0', 'cube920_cluster0','cube879_cluster0' ,'cube921_cluster0', 'cube880_cluster0', 
         'cube881_cluster0', 'cube841_cluster0','cube840_cluster0', 'cube800_cluster0','cube799_cluster0',
          
         'cube764_cluster0', 'cube763_cluster0','cube762_cluster0', 'cube837_cluster0','cube796_cluster0','cube760_cluster0',
         'cube721_cluster0', 'cube682_cluster0','cube683_cluster0', 'cube722_cluster0',
          ]       


summ=0
for i in np.unique(comm4):
  meann, lenn= get_one_nodes_avg_value(i)
  #print(meann, "-->", i, "--->", lenn)
  summ+= lenn

print()
print(summ)





282


In [80]:
all_means, pvalues = get_means_and_pvalues(meanings_dictionary, bnd_tmp, 0, df_tda_cognition)

In [81]:
##################################################################################
############################################
############################################
############################################
############################################
############################################
############################################
############################################
##################################################################################

In [82]:
#############################
def get_mean_tables(feature, feature_meaning, all_comm_features ,all_means, com_numb , df_tda_cognition):
  data_columns = list(df_tda_cognition.columns)
  thicks=[]
  meann=""

  for i in range(len(all_comm_features)):
    meann=""
    mean_float = "{:.2f}".format(all_means[i][feature])
    meann=meann+ str(mean_float)
    if feature_meaning in all_comm_features[i]:
      thicks.append("X_" + str(meann)) 
    else:
      thicks.append("._" + str(meann))
      
  return thicks

In [83]:
############################################

def print_mean( meanings_dictionary, bnd_temp, df_tda_cognition, all_means):


  all_feature_meanings=[]

  for i in range(0,len(bnd_temp)):
    comm=[]
    coordinates, pvalues = compute_differential_coordinates(df_tda_cognition, bnd_temp[i])
    
    #print("Feature names that creates this community:")
    featuress= [df_tda_cognition.columns[j0] for j0 in coordinates]

    for j in range(0,len(featuress)):
      if pvalues[j] < 0.05:
        comm.append(meanings_dictionary[featuress[j]])
    all_feature_meanings.append(comm)
  

  
  for i in range(0,len(bnd_temp)):
    feat_meaning=[]
    feat_code=[]
    coordinates, pvalues = compute_differential_coordinates(df_tda_cognition, bnd_temp[i])
    
    #print("Feature names that creates this community:")
    featuress= [df_tda_cognition.columns[j1] for j1 in coordinates]

    #PValue check:
    for j in range(0,len(featuress)):
      if pvalues[j] < 0.05:
        feat_meaning.append(meanings_dictionary[featuress[j]])
        feat_code.append(featuress[j])
    

    category = ""
    feature_name = ""
    rows = get_number_of_communities_to_print(df_tda_cognition, bnd_temp)

    for k in range(0,len(feat_meaning)):
      category, feature_name = feat_meaning[k][0:3],feat_meaning[k][4:]
      
      row_tmp = [feature_name, category]
      row_tmp = row_tmp + get_mean_tables( feat_code[k], feat_meaning[k], all_feature_meanings ,all_means,i, df_tda_cognition)
           
      rows.append(row_tmp)


    #print("rows -->",rows)
    table = Texttable()
    table.set_cols_align(["c"] * (len(bnd_temp)+2))
    table.set_deco(Texttable.HEADER | Texttable.VLINES)
    table.add_rows(rows)
    
    print("\n--------------------------------------------------------------")#, file=f)
    
    AGE=""
    AGE = get_AGE_of_community(df_tda_cognition, bnd_temp[i])
    print("GROUP-",i+1 ) #, ", " , AGE ) #, file=f)
    print(table.draw()) #, file=f)
    
    print("--------------------------------------------------------------")#, file=f)
  #f.close()
  


In [84]:
print_mean( meanings_dictionary, bnd_tmp, df_tda_cognition, all_means)






--------------------------------------------------------------
GROUP- 1
   Clinical    | Cat. |     G1      |      G2      |      G3      |      G4     
   features    |      |             |              |              |             
  Total gray   | GMV  | X_592655.32 | X_771533.94  | X_714666.40  | X_752002.16 
matter volume  |      |             |              |              |             
Total cortical | GMV  | X_435261.56 | X_578701.44  | X_534875.82  | X_562891.96 
 gray matter   |      |             |              |              |             
    volume     |      |             |              |              |             
Supratentorial | FSS  | X_905980.23 | X_1144966.52 | X_1037582.75 | X_1179358.45
    volume     |      |             |              |              |             
    Total      | GMV  | X_55629.67  |  X_65507.86  |  ._60928.54  |  X_66823.75 
 subcortical   |      |             |              |              |             
 gray matter   |      |             

In [85]:
'''
import numpy as np 
import pandas as pd
import networkx as nx

from nilearn.datasets import fetch_haxby
from nilearn.input_data import NiftiMasker

from kmapper import KeplerMapper, Cover
from sklearn.cluster import DBSCAN
from sklearn.decomposition import PCA 
from sklearn.manifold import TSNE
from umap.umap_ import UMAP

from dyneusr import DyNeuGraph
from dyneusr.tools import visualize_mapper_stages
from dyneusr.mapper.utils import optimize_dbscan



# Fetch dataset, extract time-series from ventral temporal (VT) mask
dataset = fetch_haxby()
masker = NiftiMasker(
    dataset.mask_vt[0], 
    standardize=True, detrend=True, smoothing_fwhm=4.0,
    low_pass=0.09, high_pass=0.008, t_r=2.5,
    memory="nilearn_cache")
X = masker.fit_transform(dataset.func[0])

# Encode labels as integers
df = pd.read_csv(dataset.session_target[0], sep=" ")
target, labels = pd.factorize(df.labels.values)
y = pd.DataFrame({l:(target==i).astype(int) for i,l in enumerate(labels)})

# Extract sessions 4-5
mask_sessions = df.chunks.add(1).isin([4, 5])
X = X[mask_sessions]
y = y.loc[mask_sessions, :]
target = target[mask_sessions]



# Generate a shape graph using KeplerMapper
mapper = KeplerMapper(verbose=1)

# Configure projection
tsne = TSNE(2, init='pca', random_state=1)

# Construct lens and generate the shape graph
lens = mapper.fit_transform(tsne.fit_transform(X), projection=[0, 1]) 

graph = mapper.map(
    lens, X=X, 
    cover=Cover(20, 0.5),
    clusterer=optimize_dbscan(X, k=3, p=100.0), )



# Convert to a DyNeuGraph
dG = DyNeuGraph(G=graph, y=y)

# Define some custom_layouts
dG.add_custom_layout(lens, name='lens')
dG.add_custom_layout(nx.spring_layout, name='nx.spring')
dG.add_custom_layout(nx.kamada_kawai_layout, name='nx.kamada_kawai')
dG.add_custom_layout(nx.spectral_layout, name='nx.spectral')
dG.add_custom_layout(nx.circular_layout, name='nx.circular')

# Configure some projections
pca = PCA(2, random_state=1)
tsne = TSNE(2, init='pca', random_state=1)
umap = UMAP(n_components=2, init=pca.fit_transform(X))

# Add projections as custom_layouts
dG.add_custom_layout(pca.fit_transform(X), name='PCA')
dG.add_custom_layout(tsne.fit_transform(X), name='TSNE')
dG.add_custom_layout(umap.fit_transform(X, y=None), name='UMAP')
dG.add_custom_layout(umap.fit_transform(X, y=target), name='Supervised UMAP')

# Visualize 
dG.visualize('crazy.html')

'''

'\nimport numpy as np \nimport pandas as pd\nimport networkx as nx\n\nfrom nilearn.datasets import fetch_haxby\nfrom nilearn.input_data import NiftiMasker\n\nfrom kmapper import KeplerMapper, Cover\nfrom sklearn.cluster import DBSCAN\nfrom sklearn.decomposition import PCA \nfrom sklearn.manifold import TSNE\nfrom umap.umap_ import UMAP\n\nfrom dyneusr import DyNeuGraph\nfrom dyneusr.tools import visualize_mapper_stages\nfrom dyneusr.mapper.utils import optimize_dbscan\n\n\n\n# Fetch dataset, extract time-series from ventral temporal (VT) mask\ndataset = fetch_haxby()\nmasker = NiftiMasker(\n    dataset.mask_vt[0], \n    standardize=True, detrend=True, smoothing_fwhm=4.0,\n    low_pass=0.09, high_pass=0.008, t_r=2.5,\n    memory="nilearn_cache")\nX = masker.fit_transform(dataset.func[0])\n\n# Encode labels as integers\ndf = pd.read_csv(dataset.session_target[0], sep=" ")\ntarget, labels = pd.factorize(df.labels.values)\ny = pd.DataFrame({l:(target==i).astype(int) for i,l in enumerate(la

In [86]:
'''
import networkx as nx
from sklearn.decomposition import PCA 
from sklearn.manifold import TSNE
from umap.umap_ import UMAP

from dyneusr import DyNeuGraph
from dyneusr.datasets import make_trefoil
from kmapper import KeplerMapper



# Generate synthetic dataset
dataset = make_trefoil(size=100)
X = dataset.data
y = dataset.target



# Generate shape graph using KeplerMapper
mapper = KeplerMapper(verbose=1)
lens = mapper.fit_transform(X, projection=[0])
graph = mapper.map(lens, X, nr_cubes=6, overlap_perc=0.2)



# Convert to a DyNeuGraph
dG = DyNeuGraph(G=graph, y=y)

# Define some custom_layouts
dG.add_custom_layout(lens, name='lens')
dG.add_custom_layout(nx.spring_layout, name='nx.spring')
dG.add_custom_layout(nx.kamada_kawai_layout, name='nx.kamada_kawai')
dG.add_custom_layout(nx.spectral_layout, name='nx.spectral')
dG.add_custom_layout(nx.circular_layout, name='nx.circular')

# Configure some projections
pca = PCA(2, random_state=1)
tsne = TSNE(2, init='pca', random_state=1)
umap = UMAP(n_components=2, init=pca.fit_transform(X))

# Add projections as custom_layouts
dG.add_custom_layout(pca.fit_transform(X), name='PCA')
dG.add_custom_layout(tsne.fit_transform(X), name='TSNE')
dG.add_custom_layout(umap.fit_transform(X, y=None), name='UMAP')

# Visualize 
dG.visualize("deneme_1.html")

'''

'\nimport networkx as nx\nfrom sklearn.decomposition import PCA \nfrom sklearn.manifold import TSNE\nfrom umap.umap_ import UMAP\n\nfrom dyneusr import DyNeuGraph\nfrom dyneusr.datasets import make_trefoil\nfrom kmapper import KeplerMapper\n\n\n\n# Generate synthetic dataset\ndataset = make_trefoil(size=100)\nX = dataset.data\ny = dataset.target\n\n\n\n# Generate shape graph using KeplerMapper\nmapper = KeplerMapper(verbose=1)\nlens = mapper.fit_transform(X, projection=[0])\ngraph = mapper.map(lens, X, nr_cubes=6, overlap_perc=0.2)\n\n\n\n# Convert to a DyNeuGraph\ndG = DyNeuGraph(G=graph, y=y)\n\n# Define some custom_layouts\ndG.add_custom_layout(lens, name=\'lens\')\ndG.add_custom_layout(nx.spring_layout, name=\'nx.spring\')\ndG.add_custom_layout(nx.kamada_kawai_layout, name=\'nx.kamada_kawai\')\ndG.add_custom_layout(nx.spectral_layout, name=\'nx.spectral\')\ndG.add_custom_layout(nx.circular_layout, name=\'nx.circular\')\n\n# Configure some projections\npca = PCA(2, random_state=1)\n