## User Instructions
This is a program which will create R-Maps. These are used to correlate a voxel to a continuous outcome measure.
As it stands, this software employs Pearson Correlation Coefficients, which imply it will be best to have a continuous outcome on a percent scale. If you would like to do otherwise, a Spearman Correlation is possible. 

The software will walk you through everything. 

Files are expected to follow a BIDS naming convention. 
Subject IDs are expected in the input CSV and are expected to share the same naming convention as the nifti files themselves.

In [14]:
import pandas as pd
import numpy as np
from calvin_utils.import_functions import paths_to_input_files
# What is the path to the folder/csv containing the nifti files/files paths for the neuroimaging files?
path_1 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/neuroimaging'

# What is the path to the csv with your outcome measures? Please note:
## This software expects csv to contain an outcome in a column with subject names in another column.
path_2 = '/Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/metadata/cog_decline_scores_simple.csv'

# What is would you like to call this analysis? 
## This will generate a subfolder called analysis_name at the location of path_1 and save all outputs there.
analysis_name = 'r_maps'

#----------------------------------------------------------------User Input Above----------------------------------------------------------------
path_1, path_2, out_dir = paths_to_input_files(
    path_1 = path_1,
    path_2 = path_2,
    analysis_name = analysis_name
)

I will save to: /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis


In [15]:
import os
from calvin_utils.import_matrices import import_matrices_from_folder

#Do you want to import from folder or csv?
# /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/neuroimaging/sub-37/ses-01/anat/sub-37-mwp3glanat_resampled.nii
file_pattern = '/*/*/anat/*-mwp1*resampled*'

#----------------------------------------------------------------
df_1 = import_matrices_from_folder(path_1, file_pattern=file_pattern)

df_1 = df_1.transpose()
df_1

I will search:  /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/neuroimaging/*/*/anat/*-mwp1*resampled*


Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,902619,902620,902621,902622,902623,902624,902625,902626,902627,902628
13-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
22-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
25-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
23-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
15-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
12-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
30-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
08-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
37-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [16]:
path_list = df_1.index.to_list()
basename_path_list = [os.path.basename(path).split('_')[0].split('-')[0] for path in path_list]
df_1['subject'] = basename_path_list
df_1

Unnamed: 0,0,1,2,3,4,5,6,7,8,9,...,902620,902621,902622,902623,902624,902625,902626,902627,902628,subject
13-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,13
14-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,14
22-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,22
25-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,25
23-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,23
15-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,15
12-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,12
30-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,30
08-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,8
37-mwp1glanat_resampled.nii,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,37


In [17]:
# Import a CSV with the clinical data of interest
df_2 = pd.read_csv(path_2)
df_2

Unnamed: 0,ID,diff. Mattis,perc_change_mattis,perc_improvement,Age
0,1,8,5.673759,-5.673759,57.0
1,2,-3,-2.158273,2.158273,50.0
2,3,2,1.388889,-1.388889,62.0
3,4,3,2.142857,-2.142857,50.0
4,6,1,0.704225,-0.704225,60.0
5,7,1,0.70922,-0.70922,73.0
6,9,3,2.142857,-2.142857,64.0
7,11,1,0.694444,-0.694444,62.0
8,12,0,0.0,0.0,54.0
9,14,-1,-0.699301,0.699301,49.0


In [18]:
import os
# What is the name of the column that contains your subject labels?
subject_column = 'ID'
# What you should see below is a dataframe with subjects as rows, clinical outcome/data in the first columns, and then voxel values thereafter.
column_outcomes_start_in = 1

#----------------------------------------------------------------END USER INPUTS----------------------------------------------------------------
for colname in df_2.columns[column_outcomes_start_in:]:
    column_to_merge_on = colname
    path_list = df_2.loc[:, subject_column].to_list()
    try:
        basename_path_list = [os.path.basename(path).split('_')[1] for path in path_list]
    except:
        basename_path_list = [str(path) for path in path_list]
    df_2['subject'] = basename_path_list
    df_2['subject']
    
    prep_df = df_2.copy()
    final_df = df_2.loc[:, [column_to_merge_on, 'subject']].merge(df_1, on='subject', right_index=False)
    merged_df = final_df.set_index('subject')
    columns_to_remove = None

    #Remove undesirable columns
    final_df = merged_df.copy()
    final_df = final_df.dropna()
    try:
        final_df.pop(columns_to_remove)
    except:
        print('Soft error raised: could not find column %s:' % columns_to_remove)
    from calvin_utils.network_mapping_utilities import generate_r_map
    r_df, p_df, r_squared_df = generate_r_map(final_df, mask_path=None)
    from calvin_utils.generate_nifti import view_and_save_nifti
    view_and_save_nifti(p_df, (out_dir+f'/p_map/{column_to_merge_on}'))
    view_and_save_nifti(r_df, (out_dir+f'/r_map/{column_to_merge_on}'))
    view_and_save_nifti(r_squared_df, (out_dir+f'/r_squared_map/{column_to_merge_on}'))

Soft error raised: could not find column None:
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (19, 225222)


100%|██████████| 225222/225222 [00:32<00:00, 6855.44it/s]


No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/p_map/diff. Mattis
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_map/diff. Mattis
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_squared_map/diff. Mattis
Soft error raised: could not find column None:
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (19, 225222)


100%|██████████| 225222/225222 [00:32<00:00, 6896.05it/s]


No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/p_map/perc_change_mattis
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_map/perc_change_mattis
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_squared_map/perc_change_mattis
Soft error raised: could not find column None:
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (19, 225222)


100%|██████████| 225222/225222 [00:32<00:00, 6981.11it/s]


No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/p_map/perc_improvement
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_map/perc_improvement
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_squared_map/perc_improvement
Soft error raised: could not find column None:
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (19, 225222)


100%|██████████| 225222/225222 [00:32<00:00, 6916.51it/s]


No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
No mask path specified, using default mask data
Dataframe has been masked such that shape is:  (902629,)
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/p_map/Age
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_map/Age
(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/resources/datasets/BIDS_PD_DBS_STN_WURZBURG/default_analysis/r_squared_map/Age


## Perform Delta R-Map and Permute it for Significance

## Calculate the Observed Delta-R Map Between 2 Populations

In [19]:
from calvin_utils.network_mapping_utilities import generate_delta_r_map
delta_matrix = merged_df.copy()
observed_delta_r_map = generate_delta_r_map(delta_matrix, threshold_of_interest=65, column_of_interest='Age at DOS')

KeyError: 'Age at DOS'

In [None]:
from calvin_utils.generate_nifti import view_and_save_nifti
view_and_save_nifti(observed_delta_r_map, (out_dir+'/over_vs_under_65_delta_r_map'))

(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/memory/age_adjusted_r_maps/all_comers/over_vs_under_65_delta_r_map


## Calculate the Empiric Delta-R Map Distribution 
### Note, this permutes the label of the population without permuting the neuroimaging data.
### Therefore, we are testing if the separation of the r-maps is significantly due to the variable of interest. 

In [None]:
from calvin_utils.network_mapping_utilities import permuted_patient_label_delta_r_map
from calvin_utils.print_suppression import HiddenPrints
n_permutations = 2
column_of_interest = 'Age at DOS'
threshold_of_interest = 65
with HiddenPrints():
    p_count_df = permuted_patient_label_delta_r_map(dataframe_to_permute=merged_df, 
                                                observed_delta_r_map=observed_delta_r_map, 
                                                column_of_interest=column_of_interest, 
                                                threshold_of_interest=threshold_of_interest, 
                                                n_permutations=n_permutations)

100%|██████████| 225222/225222 [00:25<00:00, 8964.99it/s]
100%|██████████| 225222/225222 [00:25<00:00, 8996.31it/s]
100%|██████████| 225222/225222 [00:25<00:00, 8974.88it/s]
100%|██████████| 225222/225222 [00:25<00:00, 8929.93it/s]


In [None]:
from calvin_utils.generate_nifti import view_and_save_nifti
view_and_save_nifti(p_values_df, (out_dir+'/over_vs_under_65_delta_r_map_p_values_df'))

(91, 109, 91)
(902629, 1)
Image saved to: 
 /Users/cu135/Dropbox (Partners HealthCare)/memory/age_adjusted_r_maps/all_comers/over_vs_under_65_delta_r_map_p_values_df


In [None]:
merged_df.pop('Patient # CDR, ADAS')
# merged_df.to_csv(os.path.join(out_dir, 'csv_for_delta_r_permutation.csv'))

subject
101      101
102      102
103      103
104      104
105      105
106      106
107      107
108      108
109      109
110      110
111      111
113      113
114      114
115      115
116      116
118      118
119      119
1201    1201
1202    1202
1203    1203
120      120
121      121
122      122
123      123
124      124
125      125
126      126
127      127
128      128
129      129
130      130
131      131
133      133
134      134
135      135
137      137
138      138
139      139
140      140
141      141
142      142
143      143
144      144
145      145
146      146
147      147
Name: Patient # CDR, ADAS, dtype: int64

In [None]:
merged_df.pop('Patient # CDR, ADAS')
merged_df.to_csv(os.path.join(out_dir, 'csv_for_permuted_delta_r_map.csv'), index=False)