# Notebook to Compare 2 R-Maps using Permutation
- Notes on controlling a regression:
    - Adding covariates to a regression will 'control' for them, but will almost always increase the R-squared. 
    - To 'remove' a covariate from the regression, you will want to regress a nuisance covariate OUT of the covariate of interest. 
        - This means your regressor will become the residuals from the regression of cov_1 ~ nuisance_cov1

In [2322]:
out_dir = '/Volumes/Expansion/datasets/Manitoba_PET/PET_R_Maps/autonocog_memory/similarity_to_lesion_memory_net'

In [2323]:
#dataset 1 info
## Alzheimer Atrophy
import_path = '/Volumes/Expansion/datasets/adni/neuroimaging/true_ad_randomized/connectivity/seeded_connectivity'
file_target = '*_T.nii*'
pre = 'sub-subh'
post = 'ucomposite'
input_csv_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/atrophy_seeds_2023/metadata/true_ad_patient_data/z0_atrophy_thresholded.csv'
sheet= None 
variable_of_interest = 'Q4'
column = None # The column you'd like to evaluate
condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
value = None # The value to compare against

## Grafman 
# import_path = '/Volumes/Expansion/datasets/GRAFMAN_TBI_MULTIFOCAL/grafman_fc'
# file_target = '*/connectivity/*t_conn.nii*'
# pre = 'sub-'
# post = '_tome'
# input_csv_path = '/Volumes/Expansion/datasets/GRAFMAN_TBI_MULTIFOCAL/metadata/mmse_scores.csv'
# sheet = None 
# variable_of_interest = 'mmse5'
# column = None  # The column you'd like to evaluate
# condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value = None # for dropping. The value to compare against

## Corbetta
# import_path = '/Volumes/Expansion/datasets/CORBETTA_STROKE_MULTIFOCAL/BIDS_Dataset'
# file_target = '*/connectivity/*t_conn.nii*'
# pre = 'sub-'
# post = '_tome'
# input_csv_path = '/Volumes/Expansion/datasets/CORBETTA_STROKE_MULTIFOCAL/Study_Metadata/FCS_Demographics_and_Behavior_MDF2.xlsx'
# sheet = 'FCS_Demographics_and_Behavior.c' 
# variable_of_interest = 'hvlt_delay'
# column = 'redcap_event_name'  # The column you'd like to evaluate
# condition = 'not'  # Thecondition to check ('equal', 'above', 'below', 'not')
# value = 'acute_arm_1' # The value to compare against

## Epilepsy Hypometabolism
# import_path = '/Volumes/Expansion/datasets/Manitoba_PET/PET_Conn'
# file_target = '*_T.nii*'
# pre = 'sub-subh'
# post = 'ulesion'
# input_csv_path = '/Volumes/Expansion/datasets/Manitoba_PET/master_list_howard.xlsx'
# sheet = 'autonocog' 
# variable_of_interest = 'Memory__max_26_'
# column = None  # The column you'd like to evaluate
# condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value = None # for dropping. The value to compare against

## PD DBS Queensland
# import_path = '/Volumes/Expansion/datasets/Queensland_PD_DBS_STN/derivatives/conn'
# file_target = '*_T.nii*'
# pre = 'LeadDBS'
# post = '_vat'
# input_csv_path = '/Volumes/Expansion/datasets/Queensland_PD_DBS_STN/Clinical/BE_CGN_QU_Clinical_Demographic.xlsx'
# sheet = 'Memory' 
# variable_of_interest = 'MOCA_Recall_change_PreToFU4_rel____'
# column = None  # The column you'd like to evaluate
# condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value = None # for dropping. The value to compare against

## Epilepsy DBS SANTE
# import_path = '/Volumes/Expansion/datasets/SANTE_Epilepsy_DBS_ANT/derivatives/conn'
# file_target = '*/connectivity/*t_conn.nii*'
# pre = 'sub-superconservativeu'
# post = '_tome'
# input_csv_path = '/Volumes/Expansion/datasets/SANTE_Epilepsy_DBS_ANT/metadata/sante_cognitive_scores_with_percent_change_WIDE.csv'
# sheet = None 
# variable_of_interest = 'Any_trouble_with_Memory_number_Percent_Change_Corrected_V2_Month_12'
# column = None  # The column you'd like to evaluate
# condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value = None # for dropping. The value to compare against

## Epilepsy iEEG
import_path = '/Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/derivatives/connectivity'
file_target = '*_T.nii*'
pre = 'sub-'
post = '_T'
input_csv_path = '/Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/master_list.csv'
sheet = None 
variable_of_interest = 'deltarec'
column = None  # The column you'd like to evaluate
condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
value = None # for dropping. The value to compare against

# ## AD DBS
# import_path = '/Volumes/Expansion/datasets/ADVANCE_AD_DBS_FORNIX/connectivity_data/vta_published_t_connectivity'
# file_target = '*_T.nii*'
# pre = ''
# post = '_vat'
# input_csv_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/cognition_2023/metadata/master_list_proper_subjects.xlsx'
# sheet = 'master_list_proper_subjects' 
# variable_of_interest = 'Percent_Cognitive_Improvement'
# column = None  # The column you'd like to evaluate
# condition = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value = None # The value to compare against

In [2324]:
#dataset 2 info
## Alzheimer Atrophy
# import_path2 = '/Volumes/Expansion/datasets/adni/neuroimaging/true_ad_randomized/connectivity/seeded_connectivity'
# file_target2 = '*_T.nii*'
# pre2 = 'sub-subh'
# post2 = 'ucomposite'
# input_csv_path2 = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/atrophy_seeds_2023/metadata/true_ad_patient_data/z0_atrophy_thresholded.csv'
# sheet2 = None 
# voi2 = 'Q4'
# column2 = ''  # The column you'd like to evaluate
# condition2 = 'not'  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = '' # for dropping. The value to compare against

## Grafman 
# import_path2 = '/Volumes/Expansion/datasets/GRAFMAN_TBI_MULTIFOCAL/grafman_fc'
# file_target2 = '*/connectivity/*t_conn.nii*'
# pre2 = 'sub-'
# post2 = '_tome'
# input_csv_path2 = '/Volumes/Expansion/datasets/GRAFMAN_TBI_MULTIFOCAL/metadata/mmse_scores.csv'
# sheet2 = None 
# voi2 = 'mmse5'
# column2 = None  # The column you'd like to evaluate
# condition2 = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = None # for dropping. The value to compare against

# Corbetta
# import_path2 = '/Volumes/Expansion/datasets/CORBETTA_STROKE_MULTIFOCAL/BIDS_Dataset'
# file_target2 = '*/connectivity/*t_conn.nii*'
# pre2 = 'sub-'
# post2 = '_tome'
# input_csv_path2 = '/Volumes/Expansion/datasets/CORBETTA_STROKE_MULTIFOCAL/Study_Metadata/FCS_Demographics_and_Behavior_MDF2.xlsx'
# sheet2 = 'FCS_Demographics_and_Behavior.c' 
# voi2 = 'hvlt_delay'
# column2 = 'redcap_event_name'  # The column you'd like to evaluate
# condition2 = 'not'  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = 'acute_arm_1' # The value to compare against

# Epilepsy Hypometabolism
# import_path2 = '/Volumes/Expansion/datasets/Manitoba_PET/PET_Conn'
# file_target2 = '*_T.nii*'
# pre2 = 'sub-subh'
# post2 = 'ulesion'
# input_csv_path2 = '/Volumes/Expansion/datasets/Manitoba_PET/master_list_howard.xlsx'
# sheet2 = 'autonocog' 
# voi2 = 'Memory__max_26_'
# column2 = None  # The column you'd like to evaluate
# condition2 = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = None # for dropping. The value to compare against

## PD DBS Queensland
# import_path2 = '/Volumes/Expansion/datasets/Queensland_PD_DBS_STN/derivatives/conn'
# file_target2 = '*_T.nii*'
# pre2 = 'LeadDBS'
# post2 = '_vat'
# input_csv_path2 = '/Volumes/Expansion/datasets/Queensland_PD_DBS_STN/Clinical/BE_CGN_QU_Clinical_Demographic.xlsx'
# sheet2 = 'Memory' 
# voi2 = 'MOCA_Recall_change_PreToFU4_rel____'
# column2 = None  # The column you'd like to evaluate
# condition2 = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = None # for dropping. The value to compare against

## Epilepsy DBS SANTE
# import_path2 = '/Volumes/Expansion/datasets/SANTE_Epilepsy_DBS_ANT/derivatives/conn'
# file_target2 = '*/connectivity/*t_conn.nii*'
# pre2 = 'sub-superconservativeu'
# post2 = '_tome'
# input_csv_path2 = '/Volumes/Expansion/datasets/SANTE_Epilepsy_DBS_ANT/metadata/sante_cognitive_scores_with_percent_change_WIDE.csv'
# sheet2 = None 
# voi2 = 'Any_trouble_with_Memory_number_Percent_Change_Corrected_V2_Month_12'
# column2 = None  # The column you'd like to evaluate
# condition2 = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = None # for dropping. The value to compare against

# ## Epilepsy iEEG
# import_path2 = '/Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/derivatives/connectivity'
# file_target2 = '*_T.nii*'
# pre2 = 'sub-'
# post2 = '_T'
# input_csv_path2 = '/Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/master_list.csv'
# sheet2 = None 
# voi2 = 'deltarec'
# column2 = None  # The column you'd like to evaluate
# condition2 = None  # Thecondition to check ('equal', 'above', 'below', 'not')
# value2 = None # for dropping. The value to compare against

# ## AD DBS
import_path2 = '/Volumes/Expansion/datasets/ADVANCE_AD_DBS_FORNIX/connectivity_data/vta_published_t_connectivity'
file_target2 = '*_T.nii*'
pre2 = ''
post2 = '_vat'
input_csv_path2 = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/cognition_2023/metadata/master_list_proper_subjects.xlsx'
sheet2 = 'master_list_proper_subjects' 
voi2 = 'Percent_Cognitive_Improvement'
column2 = None  # The column you'd like to evaluate
condition2 = None  # Thecondition to check ('equal', 'above', 'below', 'not')
value2 = None # The value to compare against

# Get Dataset One

Import Niftis
- These are EXPECTED to have subject IDs which are IDENTICAL to the subject IDs that go in the covarite DF column names below
- Column labels are subject IDs. 
- This is expected to ultimately have the form:

|        |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 |  10 | ... |  40 |  41 |  42 |  43 |  45 |  46 |  47 |  48 |  49 |  50 |
|----------|------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|------------|-----|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
| Voxel 1     | 3          | 4         | 7         | 2         | 2         | 2         | 9         | 4         | 7         | 5          | ... | 5           | 2           | 7           | 7           | 3           | 8           | 8           | 1           | 1           | 3           |
| . . .      | ...         | ...        | ...         | ...         | ...         | ...         | ...         | ...         | ...         | ...          | ... | ...           | ...           | ...           | ...           | ...           | ...           | 7           | ...           | ...           | ...           |
| Voxel N     | 2          | 1         | 0         | 1         | 3         | 4         | 9         | 5         | 8         | 6          | ... | 6           | 3           | 8           | 8           | 4           | 9           | 9           | 2           | 2           | 4           |

In [2325]:
from calvin_utils.file_utils.import_functions import GiiNiiFileImport
giinii = GiiNiiFileImport(import_path=import_path, file_column=None, file_pattern=file_target)
nimg_df = giinii.run()
nimg_df

Attempting to import from: /Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/derivatives/connectivity/*_T.nii*


Unnamed: 0,sub-2_T.nii.gz,sub-69_T.nii.gz,sub-14_T.nii.gz,sub-87_T.nii.gz,sub-10_T.nii.gz,sub-38_T.nii.gz,sub-34_T.nii.gz,sub-67_T.nii.gz,sub-8_T.nii.gz,sub-1_T.nii.gz,...,sub-71_T.nii.gz,sub-72_T.nii.gz,sub-84_T.nii.gz,sub-85_T.nii.gz,sub-86_T.nii.gz,sub-94_T.nii.gz,sub-95_T.nii.gz,sub-100_T.nii.gz,sub-102_T.nii.gz,sub-108_T.nii.gz
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Fix names

In [2326]:
nimg_df = GiiNiiFileImport.splice_colnames(nimg_df, pre, post)
nimg_df

Unnamed: 0,2,69,14,87,10,38,34,67,8,1,...,71,72,84,85,86,94,95,100,102,108
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Import Covariates

**The CSV is expected to be in this format**
- sub column contents MUST match the names of the neuroimaging files above. 
    - ID column 
```
+-----+----------------------------+--------------+--------------+--------------+
| sub | Nifti_File_Path            | Covariate_1  | Covariate_2  | Covariate_3  |
+-----+----------------------------+--------------+--------------+--------------+
| 1   | /path/to/file1.nii.gz      | 0.5          | 1.2          | 3.4          |
| 2   | /path/to/file2.nii.gz      | 0.7          | 1.4          | 3.1          |
| 3   | /path/to/file3.nii.gz      | 0.6          | 1.5          | 3.5          |
| 4   | /path/to/file4.nii.gz      | 0.9          | 1.1          | 3.2          |
| ... | ...                        | ...          | ...          | ...          |
+-----+----------------------------+--------------+--------------+--------------+
```

In [2327]:
from calvin_utils.permutation_analysis_utils.statsmodels_palm import CalvinStatsmodelsPalm
# Instantiate the PalmPrepararation class
cal_palm = CalvinStatsmodelsPalm(input_csv_path=input_csv_path, output_dir=out_dir, sheet=sheet)
# Call the process_nifti_paths method
data_df = cal_palm.read_and_display_data()

Unnamed: 0,deltarec,subject,coordinate,conn_folderfolder,seed_folder
0,-9.821429,1,x_neg29.1097_y_neg3.71512_z_neg26.02946584,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
1,-15.550244,2,x_pos19.2609_y_neg12.1202_z_neg20.18691003,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
2,-1.284781,3,x_pos30.4569_y_neg21.1225_z_neg22.62513552,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
3,-59.947090,4,x_neg38.3134_y_neg18.5687_z_neg21.06015877,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
4,-61.111111,5,x_neg45.9308_y_pos47.9976_z_pos13.03966543,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
...,...,...,...,...,...
103,19.718310,104,x_neg62.9728_y_neg21.8125_z_neg1.572045963,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
104,59.935897,105,x_pos14.371_y_pos37.9308_z_pos20.56547742,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
105,15.659722,106,x_neg58.9858_y_neg5.31671_z_neg23.65476014,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
106,16.014056,107,x_pos7.7623_y_pos17.091_z_pos31.95387294,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...


In [2328]:
# data_df['subject'] = data_df['subject'].str[4:]
data_df

Unnamed: 0,deltarec,subject,coordinate,conn_folderfolder,seed_folder
0,-9.821429,1,x_neg29.1097_y_neg3.71512_z_neg26.02946584,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
1,-15.550244,2,x_pos19.2609_y_neg12.1202_z_neg20.18691003,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
2,-1.284781,3,x_pos30.4569_y_neg21.1225_z_neg22.62513552,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
3,-59.947090,4,x_neg38.3134_y_neg18.5687_z_neg21.06015877,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
4,-61.111111,5,x_neg45.9308_y_pos47.9976_z_pos13.03966543,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
...,...,...,...,...,...
103,19.718310,104,x_neg62.9728_y_neg21.8125_z_neg1.572045963,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
104,59.935897,105,x_pos14.371_y_pos37.9308_z_pos20.56547742,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
105,15.659722,106,x_neg58.9858_y_neg5.31671_z_neg23.65476014,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
106,16.014056,107,x_pos7.7623_y_pos17.091_z_pos31.95387294,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...


**Preprocess Your Data**

**Handle NANs**
- Set drop_nans=True is you would like to remove NaNs from data
- Provide a column name or a list of column names to remove NaNs from

In [2329]:
# data_df.columns

In [2330]:
drop_list = [variable_of_interest]

In [2331]:
data_df = cal_palm.drop_nans_from_columns(columns_to_drop_from=drop_list)
display(data_df)

Unnamed: 0,deltarec,subject,coordinate,conn_folderfolder,seed_folder
0,-9.821429,1,x_neg29.1097_y_neg3.71512_z_neg26.02946584,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
1,-15.550244,2,x_pos19.2609_y_neg12.1202_z_neg20.18691003,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
2,-1.284781,3,x_pos30.4569_y_neg21.1225_z_neg22.62513552,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
3,-59.947090,4,x_neg38.3134_y_neg18.5687_z_neg21.06015877,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
4,-61.111111,5,x_neg45.9308_y_pos47.9976_z_pos13.03966543,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
...,...,...,...,...,...
103,19.718310,104,x_neg62.9728_y_neg21.8125_z_neg1.572045963,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
104,59.935897,105,x_pos14.371_y_pos37.9308_z_pos20.56547742,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
105,15.659722,106,x_neg58.9858_y_neg5.31671_z_neg23.65476014,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
106,16.014056,107,x_pos7.7623_y_pos17.091_z_pos31.95387294,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...


**Drop Row Based on Value of Column**

Define the column, condition, and value for dropping rows
- column = 'your_column_name'
- condition = 'above'  # Options: 'equal', 'above', 'below'

Set the parameters for dropping rows

In [2332]:
data_df, other_df = cal_palm.drop_rows_based_on_value(column, condition, value)
data_df

Unnamed: 0,deltarec,subject,coordinate,conn_folderfolder,seed_folder
0,-9.821429,1,x_neg29.1097_y_neg3.71512_z_neg26.02946584,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
1,-15.550244,2,x_pos19.2609_y_neg12.1202_z_neg20.18691003,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
2,-1.284781,3,x_pos30.4569_y_neg21.1225_z_neg22.62513552,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
3,-59.947090,4,x_neg38.3134_y_neg18.5687_z_neg21.06015877,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
4,-61.111111,5,x_neg45.9308_y_pos47.9976_z_pos13.03966543,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
...,...,...,...,...,...
103,19.718310,104,x_neg62.9728_y_neg21.8125_z_neg1.572045963,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
104,59.935897,105,x_pos14.371_y_pos37.9308_z_pos20.56547742,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
105,15.659722,106,x_neg58.9858_y_neg5.31671_z_neg23.65476014,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...
106,16.014056,107,x_pos7.7623_y_pos17.091_z_pos31.95387294,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...,/Volumes/Expansion/datasets/Kahana_Epilepsy_iE...


Regress out a Covariate

In [2333]:
# lis = []
# for col in data_df.columns:
#     if 'surface' in col.lower():
#         lis.append(col)
# print(lis)

In [2334]:
# from calvin_utils.statistical_utils.regression_utils import RegressOutCovariates
## use this code block to regress out covariates. Generally better to just include as covariates in a model..
# dependent_variable_list = lis
# regressors = ['Age', 'Sex']

# data_df, adjusted_dep_vars_list = RegressOutCovariates.run(df=data_df, dependent_variable_list=dependent_variable_list, covariates_list=regressors)
# print(adjusted_dep_vars_list)

**Standardize Data**
- Enter Columns you Don't want to standardize into a list

In [2335]:
## Remove anything you don't want to standardize
# cols_not_to_standardize = ['Age',  'Subiculum_Connectivity_T']

In [2336]:
# data_df = cal_palm.standardize_columns(cols_not_to_standardize)
# data_df

Choose Rows to Keep
- Keep your subject row and your dependent variable

In [2337]:
col_to_keep_list = [variable_of_interest, 'subject']

- The final DF is EXPECTED to have subject IDs which are IDENTICAL to the subject IDs that go in the neuroimaging DF column names above
- There should only be 1 variable  the row

|        |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 |  10 | ... |  40 |  41 |  42 |  43 |  45 |  46 |  47 |  48 |  49 |  50 |
|----------|------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|------------|-----|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
| Indep. Var.    | 3          | 4         | 7         | 2         | 2         | 2         | 9         | 4         | 7         | 5          | ... | 5           | 2           | 7           | 7           | 3           | 8           | 8           | 1           | 1           | 3           |

In [2338]:
data_df=data_df.loc[:, col_to_keep_list]
data_df = data_df.T
data_df.columns = data_df.loc['subject', :]
data_df = data_df.drop('subject')
data_df.dropna(inplace=True, axis=1)
data_df

subject,1.0,2.0,3.0,4.0,5.0,6.0,7.0,8.0,9.0,10.0,...,99.0,100.0,101.0,102.0,103.0,104.0,105.0,106.0,107.0,108.0
deltarec,-9.821429,-15.550244,-1.284781,-59.94709,-61.111111,-39.049145,-40.038314,-64.506173,-16.165414,-21.428571,...,16.30609,11.363636,5.714286,9.655172,5.729167,19.71831,59.935897,15.659722,16.014056,26.583333


# Get Dataset Two

Import Niftis
- These are EXPECTED to have subject IDs which are IDENTICAL to the subject IDs that go in the covarite DF column names below
- Column labels are subject IDs. 
- This is expected to ultimately have the form:

|        |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 |  10 | ... |  40 |  41 |  42 |  43 |  45 |  46 |  47 |  48 |  49 |  50 |
|----------|------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|------------|-----|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
| Voxel 1     | 3          | 4         | 7         | 2         | 2         | 2         | 9         | 4         | 7         | 5          | ... | 5           | 2           | 7           | 7           | 3           | 8           | 8           | 1           | 1           | 3           |
| . . .      | ...         | ...        | ...         | ...         | ...         | ...         | ...         | ...         | ...         | ...          | ... | ...           | ...           | ...           | ...           | ...           | ...           | 7           | ...           | ...           | ...           |
| Voxel N     | 2          | 1         | 0         | 1         | 3         | 4         | 9         | 5         | 8         | 6          | ... | 6           | 3           | 8           | 8           | 4           | 9           | 9           | 2           | 2           | 4           |

In [2339]:
from calvin_utils.file_utils.import_functions import GiiNiiFileImport
giinii2 = GiiNiiFileImport(import_path=import_path2, file_column=None, file_pattern=file_target2)
nimg_df2 = giinii2.run()
nimg_df2

Attempting to import from: /Volumes/Expansion/datasets/ADVANCE_AD_DBS_FORNIX/connectivity_data/vta_published_t_connectivity/*_T.nii*


Unnamed: 0,116_vat_seed_compound_fMRI_efield_func_seed_T.nii,1203_vat_seed_compound_fMRI_efield_func_seed_T.nii,139_vat_seed_compound_fMRI_efield_func_seed_T.nii,147_vat_seed_compound_fMRI_efield_func_seed_T.nii,135_vat_seed_compound_fMRI_efield_func_seed_T.nii,101_vat_seed_compound_fMRI_efield_func_seed_T.nii,150_vat_seed_compound_fMRI_efield_func_seed_T.nii,122_vat_seed_compound_fMRI_efield_func_seed_T.nii,1202_vat_seed_compound_fMRI_efield_func_seed_T.nii,146_vat_seed_compound_fMRI_efield_func_seed_T.nii,...,128_vat_seed_compound_fMRI_efield_func_seed_T.nii,107_vat_seed_compound_fMRI_efield_func_seed_T.nii,133_vat_seed_compound_fMRI_efield_func_seed_T.nii,141_vat_seed_compound_fMRI_efield_func_seed_T.nii,110_vat_seed_compound_fMRI_efield_func_seed_T.nii,125_vat_seed_compound_fMRI_efield_func_seed_T.nii,106_vat_seed_compound_fMRI_efield_func_seed_T.nii,129_vat_seed_compound_fMRI_efield_func_seed_T.nii,111_vat_seed_compound_fMRI_efield_func_seed_T.nii,140_vat_seed_compound_fMRI_efield_func_seed_T.nii
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Fix names

In [2340]:
nimg_df2 = GiiNiiFileImport.splice_colnames(nimg_df2, pre2, post2)
nimg_df2

Unnamed: 0,116,1203,139,147,135,101,150,122,1202,146,...,128,107,133,141,110,125,106,129,111,140
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
902624,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902625,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902626,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
902627,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


Import Covariates

**The CSV is expected to be in this format**
- sub column contents MUST match the names of the neuroimaging files above. 
    - ID column 
```
+-----+----------------------------+--------------+--------------+--------------+
| sub | Nifti_File_Path            | Covariate_1  | Covariate_2  | Covariate_3  |
+-----+----------------------------+--------------+--------------+--------------+
| 1   | /path/to/file1.nii.gz      | 0.5          | 1.2          | 3.4          |
| 2   | /path/to/file2.nii.gz      | 0.7          | 1.4          | 3.1          |
| 3   | /path/to/file3.nii.gz      | 0.6          | 1.5          | 3.5          |
| 4   | /path/to/file4.nii.gz      | 0.9          | 1.1          | 3.2          |
| ... | ...                        | ...          | ...          | ...          |
+-----+----------------------------+--------------+--------------+--------------+
```

In [2341]:
from calvin_utils.permutation_analysis_utils.statsmodels_palm import CalvinStatsmodelsPalm
# Instantiate the PalmPrepararation class
cal_palm2 = CalvinStatsmodelsPalm(input_csv_path=input_csv_path2, output_dir=out_dir, sheet=sheet2)
# Call the process_nifti_paths method
data_df2 = cal_palm2.read_and_display_data()

Unnamed: 0,subject,Age,Normalized_Percent_Cognitive_Improvement,Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group,Z_Scored_Percent_Cognitive_Improvement,Percent_Cognitive_Improvement,Z_Scored_Subiculum_T_By_Origin_Group_,Z_Scored_Subiculum_Connectivity_T,Subiculum_Connectivity_T_Redone,Subiculum_Connectivity_T,...,DECLINE,Cognitive_Improve,Z_Scored_Cognitive_Baseline,Z_Scored_Cognitive_Baseline__Lower_is_Better_,Min_Max_Normalized_Baseline,MinMaxNormBaseline_Higher_is_Better,ROI_to_Alz_Max,ROI_to_PD_Max,Standardzied_AD_Max,Standardized_PD_Max
0,101,62.0,-0.392857,0.314066,0.314066,-21.428571,-1.282630,-1.282630,21.150595,56.864683,...,1.0,No,1.518764,-1.518764,0.72,0.28,12.222658,14.493929,-1.714513,-1.227368
1,102,77.0,-0.666667,0.013999,0.013999,-36.363636,-1.760917,-1.760917,19.702349,52.970984,...,1.0,No,0.465551,-0.465551,0.48,0.52,14.020048,15.257338,-1.155843,-1.022243
2,103,76.0,-1.447368,-0.841572,-0.841572,-78.947368,-0.595369,-0.595369,23.231614,62.459631,...,1.0,No,-0.061056,0.061056,0.36,0.64,15.118727,17.376384,-0.814348,-0.452865
3,104,65.0,-2.372549,-1.855477,-1.855477,-129.411765,-0.945206,-0.945206,22.172312,59.611631,...,1.0,No,-0.412127,0.412127,0.28,0.72,13.112424,15.287916,-1.437954,-1.014027
4,105,50.0,-0.192982,0.533109,0.533109,-10.526316,-1.151973,-1.151973,21.546222,57.928350,...,0.0,No,-0.061056,0.061056,0.36,0.64,15.086568,12.951426,-0.824344,-1.641831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
194,211,58.7,,,,,-0.415745,-0.189000,19.900000,19.900000,...,,Yes,,,,,,,,
195,152,69.4,,,,,-0.701419,-0.455000,17.900000,17.900000,...,,Yes,,,,,,,,
196,208,79.2,,,,,-0.929958,-0.669000,16.300000,16.300000,...,,Yes,,,,,,,,
197,223,71.1,,,,,-0.829972,-0.575000,17.000000,17.000000,...,,Yes,,,,,,,,


**Preprocess Your Data**

**Handle NANs**
- Set drop_nans=True is you would like to remove NaNs from data
- Provide a column name or a list of column names to remove NaNs from

In [2342]:
data_df2.columns

Index(['subject', 'Age', 'Normalized_Percent_Cognitive_Improvement',
       'Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group',
       'Z_Scored_Percent_Cognitive_Improvement',
       'Percent_Cognitive_Improvement',
       'Z_Scored_Subiculum_T_By_Origin_Group_',
       'Z_Scored_Subiculum_Connectivity_T', 'Subiculum_Connectivity_T_Redone',
       'Subiculum_Connectivity_T', 'Amnesia_Lesion_T_Map', 'Memory_Network_T',
       'Z_Scored_Memory_Network_R', 'Memory_Network_R',
       'Subiculum_Grey_Matter', 'Subiculum_White_Matter', 'Subiculum_CSF',
       'Subiculum_Total', 'Standardized_Age',
       'Standardized_Percent_Improvement',
       'Standardized_Subiculum_Connectivity',
       'Standardized_Subiculum_Grey_Matter',
       'Standardized_Subiculum_White_Matter', 'Standardized_Subiculum_CSF',
       'Standardized_Subiculum_Total', 'Disease', 'Cohort', 'City',
       'Inclusion_Cohort', 'Categorical_Age_Group', 'Age_Group',
       'Age_And_Disease', 'Age_Disease_and_Cohort',

In [2343]:
drop_list2 = [voi2]

In [2344]:
data_df2 = cal_palm2.drop_nans_from_columns(columns_to_drop_from=drop_list2)
display(data_df2)

Unnamed: 0,subject,Age,Normalized_Percent_Cognitive_Improvement,Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group,Z_Scored_Percent_Cognitive_Improvement,Percent_Cognitive_Improvement,Z_Scored_Subiculum_T_By_Origin_Group_,Z_Scored_Subiculum_Connectivity_T,Subiculum_Connectivity_T_Redone,Subiculum_Connectivity_T,...,DECLINE,Cognitive_Improve,Z_Scored_Cognitive_Baseline,Z_Scored_Cognitive_Baseline__Lower_is_Better_,Min_Max_Normalized_Baseline,MinMaxNormBaseline_Higher_is_Better,ROI_to_Alz_Max,ROI_to_PD_Max,Standardzied_AD_Max,Standardized_PD_Max
0,101,62.0,-0.392857,0.314066,0.314066,-21.428571,-1.282630,-1.282630,21.150595,56.864683,...,1.0,No,1.518764,-1.518764,0.72,0.28,12.222658,14.493929,-1.714513,-1.227368
1,102,77.0,-0.666667,0.013999,0.013999,-36.363636,-1.760917,-1.760917,19.702349,52.970984,...,1.0,No,0.465551,-0.465551,0.48,0.52,14.020048,15.257338,-1.155843,-1.022243
2,103,76.0,-1.447368,-0.841572,-0.841572,-78.947368,-0.595369,-0.595369,23.231614,62.459631,...,1.0,No,-0.061056,0.061056,0.36,0.64,15.118727,17.376384,-0.814348,-0.452865
3,104,65.0,-2.372549,-1.855477,-1.855477,-129.411765,-0.945206,-0.945206,22.172312,59.611631,...,1.0,No,-0.412127,0.412127,0.28,0.72,13.112424,15.287916,-1.437954,-1.014027
4,105,50.0,-0.192982,0.533109,0.533109,-10.526316,-1.151973,-1.151973,21.546222,57.928350,...,0.0,No,-0.061056,0.061056,0.36,0.64,15.086568,12.951426,-0.824344,-1.641831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,86,57.0,,0.598787,-0.099428,0.000000,-0.087220,-0.621000,22.200000,22.200000,...,,Yes,,,,,,,,
161,87,65.0,,0.598787,-0.099428,0.000000,0.598397,0.173000,27.000000,27.000000,...,,Yes,,,,,,,,
162,88,65.0,,5.854050,2.637141,15.384615,0.269872,-0.207000,24.700000,24.700000,...,,Yes,,,,,,,,
163,89,67.0,,0.598787,-0.099428,0.000000,-0.158639,-0.694000,21.700000,21.700000,...,,Yes,,,,,,,,


**Drop Row Based on Value of Column**

Define the column, condition, and value for dropping rows
- column = 'your_column_name'
- condition = 'above'  # Options: 'equal', 'above', 'below'

Set the parameters for dropping rows

In [2345]:
data_df2, other_df2 = cal_palm2.drop_rows_based_on_value(column2, condition2, value2)
data_df2

Unnamed: 0,subject,Age,Normalized_Percent_Cognitive_Improvement,Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group,Z_Scored_Percent_Cognitive_Improvement,Percent_Cognitive_Improvement,Z_Scored_Subiculum_T_By_Origin_Group_,Z_Scored_Subiculum_Connectivity_T,Subiculum_Connectivity_T_Redone,Subiculum_Connectivity_T,...,DECLINE,Cognitive_Improve,Z_Scored_Cognitive_Baseline,Z_Scored_Cognitive_Baseline__Lower_is_Better_,Min_Max_Normalized_Baseline,MinMaxNormBaseline_Higher_is_Better,ROI_to_Alz_Max,ROI_to_PD_Max,Standardzied_AD_Max,Standardized_PD_Max
0,101,62.0,-0.392857,0.314066,0.314066,-21.428571,-1.282630,-1.282630,21.150595,56.864683,...,1.0,No,1.518764,-1.518764,0.72,0.28,12.222658,14.493929,-1.714513,-1.227368
1,102,77.0,-0.666667,0.013999,0.013999,-36.363636,-1.760917,-1.760917,19.702349,52.970984,...,1.0,No,0.465551,-0.465551,0.48,0.52,14.020048,15.257338,-1.155843,-1.022243
2,103,76.0,-1.447368,-0.841572,-0.841572,-78.947368,-0.595369,-0.595369,23.231614,62.459631,...,1.0,No,-0.061056,0.061056,0.36,0.64,15.118727,17.376384,-0.814348,-0.452865
3,104,65.0,-2.372549,-1.855477,-1.855477,-129.411765,-0.945206,-0.945206,22.172312,59.611631,...,1.0,No,-0.412127,0.412127,0.28,0.72,13.112424,15.287916,-1.437954,-1.014027
4,105,50.0,-0.192982,0.533109,0.533109,-10.526316,-1.151973,-1.151973,21.546222,57.928350,...,0.0,No,-0.061056,0.061056,0.36,0.64,15.086568,12.951426,-0.824344,-1.641831
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
160,86,57.0,,0.598787,-0.099428,0.000000,-0.087220,-0.621000,22.200000,22.200000,...,,Yes,,,,,,,,
161,87,65.0,,0.598787,-0.099428,0.000000,0.598397,0.173000,27.000000,27.000000,...,,Yes,,,,,,,,
162,88,65.0,,5.854050,2.637141,15.384615,0.269872,-0.207000,24.700000,24.700000,...,,Yes,,,,,,,,
163,89,67.0,,0.598787,-0.099428,0.000000,-0.158639,-0.694000,21.700000,21.700000,...,,Yes,,,,,,,,


Regress out a Covariate

In [2346]:
# lis = []
# for col in data_df2.columns:
#     if 'surface' in col.lower():
#         lis.append(col)
# print(lis)

In [2347]:
from calvin_utils.statistical_utils.regression_utils import RegressOutCovariates
## use this code block to regress out covariates. Generally better to just include as covariates in a model..
# dependent_variable_list2 = lis
# regressors2 = ['Age', 'Sex']

# data_df2, adjusted_dep_vars_list2 = RegressOutCovariates.run(df=data_df2, dependent_variable_list=dependent_variable_list2, covariates_list=regressors2)
# print(adjusted_dep_vars_list2)

**Standardize Data**
- Enter Columns you Don't want to standardize into a list

In [2348]:
## Remove anything you don't want to standardize
# cols_not_to_standardize2 = ['Age',  'Subiculum_Connectivity_T']

In [2349]:
# data_df2 = cal_palm2.standardize_columns(cols_not_to_standardize2)
# data_df2

In [2350]:
data_df2.columns

Index(['subject', 'Age', 'Normalized_Percent_Cognitive_Improvement',
       'Z_Scored_Percent_Cognitive_Improvement_By_Origin_Group',
       'Z_Scored_Percent_Cognitive_Improvement',
       'Percent_Cognitive_Improvement',
       'Z_Scored_Subiculum_T_By_Origin_Group_',
       'Z_Scored_Subiculum_Connectivity_T', 'Subiculum_Connectivity_T_Redone',
       'Subiculum_Connectivity_T', 'Amnesia_Lesion_T_Map', 'Memory_Network_T',
       'Z_Scored_Memory_Network_R', 'Memory_Network_R',
       'Subiculum_Grey_Matter', 'Subiculum_White_Matter', 'Subiculum_CSF',
       'Subiculum_Total', 'Standardized_Age',
       'Standardized_Percent_Improvement',
       'Standardized_Subiculum_Connectivity',
       'Standardized_Subiculum_Grey_Matter',
       'Standardized_Subiculum_White_Matter', 'Standardized_Subiculum_CSF',
       'Standardized_Subiculum_Total', 'Disease', 'Cohort', 'City',
       'Inclusion_Cohort', 'Categorical_Age_Group', 'Age_Group',
       'Age_And_Disease', 'Age_Disease_and_Cohort',

Choose Rows to Keep
- Keep subject and dependent variable row

In [2351]:
col_to_keep_list2 = [voi2, 'subject']

- The final DF is EXPECTED to have subject IDs which are IDENTICAL to the subject IDs that go in the neuroimaging DF column names above
- There should only be 1 variable  the row

|        |  1 |  2 |  3 |  4 |  5 |  6 |  7 |  8 |  9 |  10 | ... |  40 |  41 |  42 |  43 |  45 |  46 |  47 |  48 |  49 |  50 |
|----------|------------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|-----------|------------|-----|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|-------------|
| Indep. Var.    | 3          | 4         | 7         | 2         | 2         | 2         | 9         | 4         | 7         | 5          | ... | 5           | 2           | 7           | 7           | 3           | 8           | 8           | 1           | 1           | 3           |

In [2352]:
data_df2=data_df2.loc[:, col_to_keep_list2]
data_df2 = data_df2.T
data_df2.columns = data_df2.loc['subject']
data_df2 = data_df2.drop('subject')
data_df2.dropna(inplace=True, axis=1)
data_df2

subject,101.0,102.0,103.0,104.0,105.0,106.0,107.0,108.0,109.0,110.0,...,81.0,82.0,83.0,84.0,85.0,86.0,87.0,88.0,89.0,91.0
Percent_Cognitive_Improvement,-21.428571,-36.363636,-78.947368,-129.411765,-10.526316,-38.461538,-15.384615,-29.166667,-30.434783,-84.615385,...,7.142857,-3.333333,0.0,-3.448276,8.695652,0.0,0.0,15.384615,0.0,3.571429


# Define an Already Existing Map to Compare Similarity To
- if not using, set to None

In [2353]:
map_path = None
# data_df2=None
# nimg_df2=None

# Test 2 Maps

# Prepare the Arguments for Permutation Testing

Is there a particular mask you want to use?
- MUST match the resolution of voxelwise data being analyzed. 
- If you set None, the voxelwise data will be used for thresholding. 
    - Values below mask_threshold (float) will be set to 0. 
- Warning: bad masking may result in failed experiments. Erroneous voxels outside the brain will influence the correction. 

In [2354]:
mask_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/resources/atlases/MNI_structures/cortex/mni_152_cortex_mask_2mm.nii'
mask_threshold = 0

Correlation method
- spearman or pearson

In [2355]:
method = 'spearman'

Choose Max Stat Correction Method
- None | pseudo_var_smooth | var_smooth

In [2356]:
max_stat_method = 'pseudo_var_smooth'

ROI to analyze within

In [2357]:
roi_path = None
roi_threshold = 0

Initialize the Permutation testing Class

In [2358]:
from calvin_utils.permutation_analysis_utils.correlation_fwe_comparison import CalvinFWEWrapper
wrapper = CalvinFWEWrapper(neuroimaging_dataframe1=nimg_df, 
                           variable_dataframe1=data_df, 
                           neuroimaging_dataframe2=nimg_df2, 
                           variable_dataframe2=data_df2, 
                           mask_threshold=mask_threshold, 
                           mask_path=mask_path, 
                           out_dir=out_dir, 
                           method=method, 
                           max_stat_method=max_stat_method,
                           roi_path=roi_path, roi_threshold=0,
                           map_path=map_path, use_spearman=True,
                           two_tail=False)



Analyze the Similarity of the 2 maps

In [2359]:
# Running Pearson correlation analysis with ROI mask
observed_correlation, permuted_correlations = wrapper.run_pearson_analysis(n_permutations=100)

Running permutation: 100%|██████████| 100/100 [01:19<00:00,  1.26it/s]

Observed: 0.4377881301787548, p-value 0.96, using 2-tail: False.





Analyze the Peaks of the Two Maps

In [2360]:
# # Running peak voxel finding analysis with ROI mask
# observed_peak_distance, permuted_peak_distances = wrapper.run_peak_voxel_analysis(n_permutations=1000)

In [2361]:
# from scipy.stats import percentileofscore
# test_value = 2
# percentile = percentileofscore(permuted_peak_distances, test_value, kind='rank')
# percentile