# Get Files and Put them in a CSV
- To prepare a master list, create a dictionary as instructed, where each dataset is a key
    - 'nifti_path': str, a wildcarded path to NIFTI files.
    - 'csv_path': str, the absolute path to a CSV file containing subject data.
    - 'subj_col': str, the column name in the CSV file that contains subject IDs.
    - 'covariate_col': dict, a dictionary where keys are common covariate names and values are the corresponding column names in the CSV file.

- Example:
```
data_dict = {
    'Dataset1': {
        'nifti_path': '/path/to/niftis/*.nii.gz',
        'csv_path': '/path/to/csv1.csv',
        'subj_col': 'sub',
        'covariate_col': {'age': 'Age', 'sex': 'Sex', 'baseline': 'ADAS-Cog11', 'indep_col': 'Indep. Var.',}
    },
    'Dataset2': {
        'nifti_path': '/path/to/niftis/*.nii.gz',
        'csv_path': '/path/to/csv2.csv',
        'subj_col': 'sub',
        'covariate_col': {'age': 'pt_Age', 'sex': 'Sex', 'baseline': 'MDRS', 'indep_col': 'Indep. Var.',}
    }
}
```

In [9]:
data_dict = {
    'adni_memory': {
        'nifti_path': '/Volumes/OneTouch/datasets/adni/neuroimaging/all_patients_atrophy_seeds/sub-*/ses*/unthresholded_tissue_segment_z_scores/*_grey_matter+cerebrospinal_fluid.nii',
        'csv_path': '/Volumes/Expansion/datasets/adni/metadata/updated_master_list/master_dx_updated_fix_composite.csv',
        'subj_col': 'subid',
        'covariate_col': {'age': 'Age', 'sex': 'Sex', 'diagnosis': 'DIAGNOSIS_CURRENT_Str', 'overall_cognition': 'TOTAL11', 'lesion_size': '', 'memory_target': 'Q4', 'percent_decline_memory': 'Perc_Decline_Q4', 'baseline_memory': '', '1yr_memory': ''}
    },
    'corbetta_memory': {
        'nifti_path': '/Volumes/Expansion/datasets/CORBETTA_STROKE_MULTIFOCAL/BIDS_Dataset/sub-*/connectivity/sub-*-yeo1k_stat-t_conn.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/CORBETTA_STROKE_MULTIFOCAL/Study_Metadata/acute_arm_1_clean.csv',
        'subj_col': 'subid',
        'covariate_col': {'age': 'age', 'sex': 'gender', 'diagnosis': 'stroke', 'overall_cognition': 'nihss_hospital', 'lesion_size': '', 'memory_target': 'hvlt_delay', 'percent_decline_memory': 'hvlt_perc_decline', 'baseline_memory': '', '1yr_memory': ''}
    },
    'grafmann_memory': {
        'nifti_path': '/Volumes/Expansion/datasets/GRAFMAN_TBI_MULTIFOCAL/grafman_fc/sub-*/connectivity/sub-*-yeo1000udil_space-2mm_stat-t_conn.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/GRAFMAN_TBI_MULTIFOCAL/metadata/master_list.csv',
        'subj_col': 'vhis_id',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': 'tbi', 'overall_cognition': 'mmse12', 'lesion_size': 'lesion_size', 'memory_target': 'mmse5', 'percent_decline_memory': 'mmse5_perc_decline', 'baseline_memory': '', '1yr_memory': ''}
    },
    'manitoba_memory': {
        'nifti_path': '/Volumes/Expansion/datasets/Manitoba_Epilepsy_PET/derivatives/PET_Conn/sub-*-2mm_lesionMask_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/Manitoba_Epilepsy_PET/metadata/master_list.csv',
        'subj_col': 'subject',
        'covariate_col': {'age': 'Age_At_Testing', 'sex': 'Sex', 'diagnosis': 'epilepsy', 'overall_cognition': 'Total (max=100)', 'lesion_size': '', 'memory_target': 'Memory (max=26)', 'percent_decline_memory': 'memory_perc_decline', 'baseline_memory': '', '1yr_memory': ''}
    },
    'ms_memory':{
        'nifti_path': '/Volumes/Expansion/datasets/KLETENIK_MS_MULTIFOCAL/derivatives_connectivity/sub-*/connectivity/sub-*-yeo1000udil_stat-t_conn.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/KLETENIK_MS_MULTIFOCAL/metadata/MS_memory.csv',
        'subj_col': 'mri_refno',
        'covariate_col': {'age': 'age', 'sex': 'sex', 'diagnosis': 'multiple_sclerosis', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'percent_correct', 'percent_decline_memory': 'percent_wrong', 'baseline_memory': '', '1yr_memory': ''}
    },
    'kahana_memory': {
        'nifti_path': '/Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/derivatives_unclassified_stim/sub-*/ses-*/conn/*sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/Kahana_Epilepsy_iEEG/metadata/master_list_stim_effect.csv',
        'subj_col': 'Subject_ID',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': 'epilepsy', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'percent_improve_by_stim', 'percent_decline_memory': 'percent_decline_by_stim', 'baseline_memory': '', '1yr_memory': '', 'ses': 'session'}
    },
    'queensland_memory': {
        'nifti_path': '/Volumes/Expansion/datasets/Queensland_PD_DBS_STN/derivatives/conn/*.nii',
        'csv_path': '/Volumes/Expansion/datasets/Queensland_PD_DBS_STN/metadata/queensland_cognition.csv',
        'subj_col': 'Subject',
        'covariate_col': {'age': 'age_at_surgery', 'sex': '', 'diagnosis': 'parkinson', 'overall_cognition': 'FU4_MOCA_Total', 'lesion_size': '', 'memory_target': 'MOCA_Recall_change_PreToFU4_Percent_Improve', 'percent_decline_memory': 'MOCA_Recall_change_PreToFU4_Percent_Decline', 'baseline_memory': '', '1yr_memory': ''}
    },
    'sante_memory': {
        'nifti_path': '/Volumes/OneTouch/datasets/SANTE_Epilepsy_DBS_ANT/derivatives/conn/sub-*/connectivity/sub-*-yeo1000udil_space-2mm_stat-t_conn.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/SANTE_Epilepsy_DBS_ANT/metadata/sante_cognitive_scores_with_percent_change_WIDE.csv',
        'subj_col': 'subject',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': 'epilepsy', 'overall_cognition': 'Concentrating on reading-number Percent Change Corrected V2_Month 6', 'lesion_size': '', 'memory_target': 'Trouble remembering-number Percent Change Corrected V2_Month 12', 'percent_decline_memory': 'Trouble remembering_Mo_12_perc_decline', 'baseline_memory': '', '1yr_memory': ''}
    },
    'maastricth_memory': {
        'nifti_path': '/Volumes/Expansion/datasets/Maastricht_Epilepsy_DBS_ANT/derivatives/sub-*/connectivity/sub-*-GSP1000uMF_space-2mm_stat-t_conn.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/Maastricht_Epilepsy_DBS_ANT/metadata/master_list.csv',
        'subj_col': 'Subject',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': 'epilepsy', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'q15_perc_improve', 'percent_decline_memory': 'q15_perc_decline', 'baseline_memory': 'baseline_q15_memoryScore', '1yr_memory': 'final_q15_memoryScore'}
    },
    'fornix_memory': {
        'nifti_path': '/Volumes/OneTouch/datasets/AD_dataset/*/stimulations/MNI_ICBM_2009b_NLIN_ASYM/gs_20180403170745/GSP_1000_Yeo_2011_Full Set (Yeo 2011)/vat_seed_compound_fMRI_efield_func_seed_T.nii',
        'csv_path': '/Volumes/Expansion/datasets/AD_dataset/metadata/ad_data.csv',
        'subj_col': 'subid',
        'covariate_col': {'age': 'Age at DOS', 'sex': '', 'diagnosis': 'alzheimer', 'overall_cognition': 'Baseline CDR (sum of squares)', 'lesion_size': '', 'memory_target': 'perc_improve', 'percent_decline_memory': '% Change from baseline (ADAS-Cog11)', 'baseline_memory': '', '1yr_memory': ''}
    },
    'hebscher_tms': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HEBSCHER_TMS/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HEBSCHER_TMS/metadata/master_list.csv',
        'subj_col': 'subject',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'network-targeted memory', 'percent_decline_memory': '', 'baseline_memory': 'network-targeted memory', '1yr_memory': 'control memory score'}
    },
    'hermiller_tms_a': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HERMILLER_TMS/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HERMILLER_TMS/metadata/master_list_updated_coords.csv',
        'subj_col': 'SUBJECT',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': '_change_memory_score__post_stim_vs__pre_stim_', 'percent_decline_memory': '', 'baseline_memory': '', '1yr_memory': ''}
    },
    'hebscher_tms_b': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HERMILLER_TMS/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HERMILLER_TMS_B/metadata/master_list_updated_coords.csv',
        'subj_col': 'SUBJECT',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': '_change_memory_score__post_stim_vs__pre_stim_', 'percent_decline_memory': '', 'baseline_memory': '', '1yr_memory': ''}
    },
    'hebscher_tms_c': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/Hermiller_TMS_C/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/Hermiller_TMS_C/metadata/master_list_updated_coords.csv',
        'subj_col': 'SUBJECT',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'difference in percentage', 'percent_decline_memory': '', 'baseline_memory': '', '1yr_memory': ''}
    },
    'hebscher_tms_d': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HERMILLER_TMS_D/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/HERMILLER_TMS_D/METADATA/master_list.csv',
        'subj_col': 'SUBJECT',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'difference in recollection proportion (LPAR TBS vs control stim)', 'percent_decline_memory': '', 'baseline_memory': '', '1yr_memory': ''}
    },
    'kim_tms': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/KIM_TMS/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/KIM_TMS/metadata/master_list_updated_coords.csv',
        'subj_col': 'SUBJECT',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'difference_in_accuracy_two', 'percent_decline_memory': '', 'baseline_memory': '', '1yr_memory': ''}
    },
    'wang_tms': {
        'nifti_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/WANG_TMS/derivatives/sub-*/conn/sub-*-MNI152_T1_2mm-tms_sphere_roi_Precom_T.nii.gz',
        'csv_path': '/Volumes/Expansion/datasets/VOSS_STUDIES_TMS/WANG_TMS/metadata/master_list.csv',
        'subj_col': 'SUBJECT',
        'covariate_col': {'age': '', 'sex': '', 'diagnosis': '', 'overall_cognition': '', 'lesion_size': '', 'memory_target': 'perc_change memory score (post-stim vs. pre-stim)', 'percent_decline_memory': '', 'baseline_memory': '', '1yr_memory': ''}
    }
}

Generate the DataFrame

In [10]:
from calvin_utils.file_utils.csv_prep import CSVComposer
composer = CSVComposer(data_dict)
composer.compose_df()
display(composer.composed_df)

  csv_data = pd.read_csv(params['csv_path'])
Processing corbetta_memory: 100%|██████████| 155/155 [00:02<00:00, 57.61it/s]


Unnamed: 0,Dataset,Subject,Nifti_File_Path,age,sex,diagnosis,overall_cognition,lesion_size,memory_target,percent_decline_memory,baseline_memory,1yr_memory
0,corbetta_memory,50,/Volumes/Expansion/datasets/CORBETTA_STROKE_MU...,,,,,,0.0,,,
1,corbetta_memory,51,/Volumes/Expansion/datasets/CORBETTA_STROKE_MU...,,,,,,7.0,,,
2,corbetta_memory,52,/Volumes/Expansion/datasets/CORBETTA_STROKE_MU...,,,,,,4.0,,,
3,corbetta_memory,53,/Volumes/Expansion/datasets/CORBETTA_STROKE_MU...,,,,,,8.0,,,
4,corbetta_memory,54,/Volumes/Expansion/datasets/CORBETTA_STROKE_MU...,,,,,,6.0,,,
...,...,...,...,...,...,...,...,...,...,...,...,...
150,corbetta_memory,203,,,,,,,4.0,,,
151,corbetta_memory,204,,,,,,,1.0,,,
152,corbetta_memory,lot,,,,,,,7.0,,,
153,corbetta_memory,,,age,gender,,nihss_hospital,,hvlt_delay,,,


Save the CSV

In [11]:
output_csv_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_00/master_list.csv'

In [12]:
composer.save_csv(output_csv_path)

Save the Data Dict 

In [5]:
output_json_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/ccm_memory/results/notebook_00/data_dict.json'

In [6]:
composer.save_dict_as_json(output_json_path)

Enjoy 
- Calvin