# Run FSL Palm

### Authors: Calvin Howard

#### Last updated: July 6, 2023

Use this to run/test a statistical model (e.g., regression or T-tests) on lesion network maps (or lesions alone!) using PALM, potentially taking into account specific covariates of interest and/or nuisance regressors.

Notes:
- To best use this notebook, you should be familar with GLM design and Contrast Matrix design. See this webpage to get started:
[FSL's GLM page](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/GLM)
- This notebook is a combination of the old PALM Notebooks and Christopher's palm_analysis notebooks (does the same thing) and requires the NIMLAB Python 3 environment as a kernel. Directions are on the [NIMLAB software_env README.md](https://github.com/nimlab/software_env)
- You will need a csv file that provides the paths to your fcMaps, usually created from the [Preprocessing](https://github.com/nimlab/templates/blob/master/py3_notebooks/1_Preprocessing_LesionQA_fcSurfBcbLqtGen_nimtrack.ipynb) notebook.
- Christopher wrote up a really nice description of how to modify code to set up your design matrix within the notebook here: [PALM-Analysis](https://github.com/nimlab/documentation/wiki/PALM-experimental-designs)
- I would also recommend reviewing Fred's [slides](https://github.com/nimlab/documentation/blob/master/presentations/presentation_palm_multidataset_analyses_labmeeting_13.4.2020.pdf) from his May 2020 lab meeting For details regarding the difference between implementing a random effects and fixed effects analysis and use of exchangeability blocks

In [None]:
## Packages and environmental settings:

##Packages:
import os
import sys
import shutil
import numpy as np
import pandas as pd
from tqdm import tqdm
from matplotlib import pyplot as plt
from nimlab import software as sf
from nimlab import datasets as nimds
from nilearn import image, plotting

%matplotlib inline

# 01 - Create Dependent Variable

Option A - Provide a list of Nifti Files

In [None]:
# Specify the path to your CSV file containing NIFTI paths
nifti_path_csv = '/Users/cu135/Dropbox (Partners HealthCare)/studies/ccm_memory/metadata/master_file_nifti_paths_for_palm.csv'
output_directory = "/Users/cu135/Dropbox (Partners HealthCare)/studies/ccm_memory/neuroimaging/derivatives/palm"

In [None]:
from calvin_utils.permutation_analysis_utils.palm_utils import PalmPreparation

# Instantiate the PalmPrepararation class
palm_prep = PalmPreparation(out_dir=output_directory)

# Call the process_nifti_paths method
nifti_df, ordered_nifti_list = palm_prep.process_nifti_paths(nifti_path_csv)

# Display nifti_df
print(nifti_df)


Option B - Manual Creation of Voxelwise Target

Under work. 

Preparation of Non-Voxelwise Explanatory Variables

In [None]:
from calvin_utils.file_utils.dataframe_utilities import preprocess_colnames_for_regression
out_dir = '/PHShome/cu135/permutation_tests/fsl_palm'
path_to_clinical_data = '/PHShome/cu135/datasets/ad_dns/grey_matter_damage_score_and_outcomes.csv'
clinical_df = preprocess_colnames_for_regression(pd.read_csv(path_to_clinical_data))
clinical_df.columns

Formula-based derivation of design matrix

In [None]:
# Define the explanatory variable formula
# Each variable must be defined as 'Q("var_1"). Interactions are defined as 'Q("var_1"):'Q("var_2")
formula_vars = [
'Age'
]
# Define the column containing th subject id
subject_id_column = 'Patient___CDR__ADAS'


# Create the design matrix#----------------------------------------------------------------
design_matrix = create_design_matrix(formula_vars, clinical_df, subject_id_column)
# Display the design matrix
design_matrix


Create Voxelwise Explanatory Variables

In [None]:
nifti_path_dictionary = {
    'connectivity': '/PHShome/cu135/memory/file_paths/paths_to__.csv'
}
#----------------------------------------------------------------DO NOT TOUCH----------------------------------------------------------------
# Extracting a dataframe of voxelwise nifti data 
print('This is the updated design matrix, including voxelwise data as niftis')
updated_design_matrix = design_matrix.copy()
for k, v in nifti_path_dictionary.items():
    _, nifti_df = process_nifti_paths(v);
    nifti_df.rename(columns={'nifti_path': k}, inplace=True)
    nifti_df.set_index('subject_id', inplace=True, drop=True)
    nifti_df.index = nifti_df.index.astype(int)
    updated_design_matrix = updated_design_matrix.merge(nifti_df, left_index=True, right_index=True)
updated_design_matrix

Generate 4-dimensional Niftis for Explanatory variables

In [None]:
nifti_design_matrix = generate_4d_explanatory_variable_niftis(design_matrix=updated_design_matrix, mask_img=MNI_brain_mask, out_dir=out_dir)
nifti_design_matrix

Define Interactions In the Data

In [None]:
interaction_pairs = [
    ('Age', 'Age'),
    ('Age', 'connectivity')
]

#----------------------------------------------------------------Do NOT TOUCH_----------------------------------------------------------------
if interaction_pairs[0] is not None: 
    interaction_design_matrix = generate_interaction_design_matrix(nifti_design_matrix, interaction_terms=interaction_pairs)
else:
    interaction_design_matrix = nifti_design_matrix

unique_columns = set(interaction_design_matrix.columns) - set(design_matrix.columns)
final_design_matrix = design_matrix.copy()
for column in unique_columns:
    for col in interaction_design_matrix.columns:
        if col in column:
            img = nib.load(interaction_design_matrix[column].values[0])
            img_data = img.get_fdata()

            # Compute mean value for each i in 4th dimension
            img_df = pd.DataFrame(img_data.reshape(-1, img_data.shape[3])).dropna()
            
            brain_mask_data = np.nan_to_num(MNI_brain_mask.get_fdata(), nan=0, posinf=0, neginf=0)
            img_df_masked = img_df[brain_mask_data.flatten() > 0]
            mean_values = np.nanmean(img_df_masked.values, axis=(0))
            
            # Create a new column in final_design_matrix with the mean values
            final_design_matrix[f'voxelwise_{column}'] = mean_values
        else:
            continue
if 'Intercept' in final_design_matrix.columns:
    voxelwise_evs = [(interaction_design_matrix[column].values[0], final_design_matrix.columns.get_loc(f'voxelwise_{column}')) for column in unique_columns]
else:
    voxelwise_evs = [(interaction_design_matrix[column].values[0], final_design_matrix.columns.get_loc(f'voxelwise_{column}')+1) for column in unique_columns]
print('These are the files that will be passed as a voxelwise explanatory variable (a voxelwise_ev): \n')
[print(f'file: {voxelwise_ev[0]} at column {voxelwise_ev[1]} in the design matrix \n') for voxelwise_ev in voxelwise_evs]
final_design_matrix



In [None]:
print('These are the files that will be passed as a voxelwise explanatory variable (a voxelwise_ev): \n')
[print(f'file: {voxelwise_ev[0]} at column {voxelwise_ev[1]} in the design matrix \n') for voxelwise_ev in voxelwise_evs]
final_design_matrix

In [None]:
# Each variable must be defined as 'Q("var_1"). Interactions are defined as 'Q("var_1"):'Q("var_2")
dependent_variable = [
'__Change_from_baseline__ADAS_Cog11_'
]
# Define the column containing th subject id
subject_id_column = 'Patient___CDR__ADAS'


#----------------------------------------------------------------
dependent_df = create_design_matrix(dependent_variable, clinical_df, subject_id_column)
# Ensure the subjects used correspond identically to the design_matrix. 
# Compare the indices of dependent_df and final_design_matrix
if not dependent_df.index.equals(final_design_matrix.index):
    raise ValueError("Indices of dependent_df and final_design_matrix do not match.")

# Resample dependent_df based on final_design_matrix.index if the indices are not identical
if not dependent_df.index.equals(final_design_matrix.index):
    dependent_df = dependent_df.loc[final_design_matrix.index].copy()

# Display the design matrix
dependent_df.pop('Intercept')
dependent_df

In [None]:
#---------------------------------------------------------DO NOT MODIFY--------------------------------------------------------
ordered_image_list = generate_dependent_variable_niftis(dependent_df, mask_img=MNI_brain_mask, out_dir=out_dir)
print('Below is the ordered list of niftis which will be passed as the dependent variable')
ordered_image_list
len(ordered_image_list)

# 02 - Generate Design Matrix

create_design_matrix(self, 
formula_vars=None, 
data_df=None, 
subject_id_column=None, 
subject_ids=None):

In [None]:
# Call the create_design_matrix method to generate a design matrix with only an intercept
design_matrix = palm_prep.create_design_matrix(subject_ids=nifti_df['subject_id'].tolist())

# Display the design matrix
print(design_matrix)

# 03 - Generate Contrast Matrix

Generate a Contrast Matrix
- This is different from the contrast matrices used in cell-means regressions such as in PALM, but it is much more powerful. 



For more information on contrast matrices, please refer to this: https://cran.r-project.org/web/packages/codingMatrices/vignettes/codingMatrices.pdf

Generally, these drastically effect the results of ANOVA. However, they are mereley a nuisance for a regression.
In essence, they assess the coefficients of a given

________________________________________________________________
A coding matrix (a contrast matrix if it sums to zero) is simply a way of defining what coefficients to evaluate and how to evaluate them. 
If a coefficient is set to 1 and everything else is set to zero, we are taking the mean of the coefficient's means and assessing if they significantly
deviate from zero--IE we are checking if it had a significant impact on the ability to predict the depdendent variable.
If a coefficient is set to 1, another is -1, and others are 0, we are assessing how the means of the two coefficients deviate from eachother. 
If several coefficients are 1 and several others are -1, we are assessing how the group-level means of the two coefficients deviate from eachother.
If a group of coefficients are 1, a group is -1, and a group is 0, we are only assessing how the groups +1 and -1 have differing means. 

1: This value indicates that the corresponding variable's coefficient in the model is included in the contrast. It means you are interested in estimating the effect of that variable.

0: This value indicates that the corresponding variable's coefficient in the model is not included in the contrast. It means you are not interested in estimating the effect of that variable.

-1: This value indicates that the corresponding variable's coefficient in the model is included in the contrast, but with an opposite sign. It means you are interested in estimating the negative effect of that variable.

----------------------------------------------------------------
The contrast matrix is typically a matrix with dimensions (number of contrasts) x (number of regression coefficients). Each row of the contrast matrix represents a contrast or comparison you want to test.

For example, let's say you have the following regression coefficients in your model:

Intercept, Age, connectivity, Age_interaction_connectivity
A contrast matric has dimensions of [n_predictors, n_experiments] where each experiment is a contrast

If you want to test the hypothesis that the effect of Age is significant, you can set up a contrast matrix with a row that specifies this contrast (actually an averaging vector):
```
[0,1,0,0]. This is an averaging vector because it sums to 1
```
This contrast will test the coefficient corresponding to the Age variable against zero.


If you want to test the hypothesis that the effect of Age is different from the effect of connectivity, you can set up a contrast matrix with two rows:
```
[0,1,−1,0]. This is a contrast because it sums to 0
```

Thus, if you want to see if any given effect is significant compared to the intercept (average), you can use the following contrast matrix:
```
[1,0,0,0]
[-1,1,0,0]
[-1,0,1,0]
[-1,0,0,1] actually a coding matrix of averaging vectors
```

The first row tests the coefficient for Age against zero, and the second row tests the coefficient for connectivity against zero. The difference between the two coefficients can then be assessed.
_____
You can define any number of contrasts in the contrast matrix to test different hypotheses or comparisons of interest in your regression analysis.

It's important to note that the specific contrasts you choose depend on your research questions and hypotheses. You should carefully consider the comparisons you want to make and design the contrast matrix accordingly.

- Examples:
    - [Two Sample T-Test](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/GLM#Two-Group_Difference_.28Two-Sample_Unpaired_T-Test.29)
    - [One Sample with Covariate](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/GLM#Single-Group_Average_with_Additional_Covariate)

In [None]:
# Generate the basic contrast matrix and display it
contrast_matrix = palm_prep.generate_basic_contrast_matrix(design_matrix)

# If you want to modify the contrast_matrix, do it here
# contrast_matrix = 

In [None]:
# Save the contrast matrix to a CSV file
contrast_matrix = palm_prep.save_contrast_matrix_to_csv(design_matrix=design_matrix, contrast_matrix=contrast_matrix, file_name="palm_contrast_matrix.csv")

# 04 - Exchangeability Blocks (optional)

Optional - Exchangability Blocks
- This is optional and for when you are doing a 'meta-analysis' of multiple data types, e.g. strokes and DBS sites
- This is a column of integers that can usually be generated from the dataset names. Details on the [PALM website](https://fsl.fmrib.ox.ac.uk/fsl/fslwiki/PALM/ExchangeabilityBlocks)
- To use this, add the following to the `call_palm` command below `eb=eb_matrix`.

In [None]:
### This is just an example, you will have to edit to adapt to your data, 
### but it should be integers, starting with 1,2,3....

# coding_key = {"Prosopagnosia_w_Yeo1000": 1,
#              "Corbetta_Lesions": 1,
#              "DBS_dataset": 2
#              }

# eb_matrix = pd.DataFrame()
# eb_matrix = clean_df['dataset'].replace(coding_key)
# display(eb_matrix)

# 05 - Submit to PALM

In [None]:
# Edit this according to documentation page
cluster_username = 'cu135'
cluster_email = 'choward12@bwh.harvard.edu'
number_of_permutations=10000

In [None]:
# Current best default settings:
# palm_prep = PalmPreparation(out_dir=output_directory)
palm_prep.calvins_call_palm(input_imgs=ordered_nifti_list,
             design_matrix=design_matrix,
             contrast_matrix=contrast_matrix,
             working_directory=None,
             output_directory=None,
             iterations=number_of_permutations,
             accel="tail",
             voxelwise_evs=None,
             eb=None,
             mask="",
             save_1p=True,
             logp=False,
             tfce=False,
             ise_flag=False,
             two_tailed_flag=True,
             corrcon_flag=False,
             fdr_flag=False,
             cluster_name="erisone",
             username=cluster_username,
             cluster_email=cluster_email,
             queue="normal",
             cores="1",
             memory="6000",
             dryrun=False,
             job_name="fsl_palm",
             job_time="",
             num_nodes="",
             num_tasks="",
             x11_forwarding="",
             service_class="",
             debug=False,
             extra=""
    )

Results

Notes: 
- I have intentionally left the function definition here so that you can modify it as needed. Note the option for slices, cmap, and vmax are currently set inside the function to keep all the maps comparable. If this is clunkly, we can change this.
- Note the p-value is given as 1-p (so put 0.95 for p<0.05...)

In [None]:
# Function that thresholds Tmaps using a specified p-value map and alpha level
def thresholded_PALM_Tmap(output_dir,tvalue_file='_vox_tstat.nii', pvalue_file='_vox_tstat_fwep.nii', threshold='0.95',showT=False):
    tvalue_map = image.math_img("a*b",a=os.path.join(output_dir,tvalue_file),b=MNI_brain_mask)
    pvalue_map = os.path.join(output_dir,pvalue_file)
    pvalue_mask = image.math_img(f'MNI_brain_mask * img > {threshold}', img=pvalue_map, MNI_brain_mask=MNI_brain_mask)
    combined = image.math_img("a*b",a=tvalue_map,b=pvalue_mask)
    print("There are",np.sum(pvalue_mask.get_fdata()),"non-zero voxels in this maps")
    if showT:
        plotting.plot_stat_map(tvalue_map, 
                display_mode="z", cut_coords=[-42,-24,-12,0,12,24,42,54], cmap="cold_hot", colorbar=True, title=tvalue_file, vmax=6)
    plotting.plot_stat_map(combined, 
                display_mode="z", cut_coords=[-42,-24,-12,0,12,24,42,54], cmap="cold_hot", colorbar=True,title=pvalue_file, vmax=6)

    
print("FWE")
thresholded_PALM_Tmap(output_dir,tvalue_file='_vox_tstat_c2.nii', pvalue_file='_vox_tstat_fwep_c2.nii', threshold='0.95',showT=True)

print("FDR")
thresholded_PALM_Tmap(output_dir,tvalue_file='_vox_tstat.nii', pvalue_file='_vox_tstat_fdrp.nii', threshold='0.95')

print("uncorrected")
thresholded_PALM_Tmap(output_dir,tvalue_file='_vox_tstat.nii', pvalue_file='_vox_tstat_uncp.nii', threshold='0.95')

# print("TFCE FWE")
# thresholded_PALM_Tmap(output_dir,tvalue_file='_tfce_tstat.nii', pvalue_file='_tfce_tstat_fwep.nii', threshold='0.95')

Clean Up

In [None]:
# Clean up the big temporary file to keep result directories small:
try:
    os.remove(os.path.join(working_dir,'concat.nii'))
except:
    print('No concat file to delete.')