In [None]:
# Specify where you want to save your results to
out_dir = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/atrophy_seeds_2023/Figures/supplement_parametric_regression_to_adascog'

Import Data

In [None]:
# Specify the path to your CSV file containing NIFTI paths
input_csv_path = '/Users/cu135/Partners HealthCare Dropbox/Calvin Howard/studies/cognition_2023/metadata/master_list_proper_subjects.xlsx'
sheet = 'master_list_proper_subjects' # 'master_list_proper_subjects'

In [None]:
from calvin_utils.permutation_analysis_utils.statsmodels_palm import CalvinStatsmodelsPalm
# Instantiate the PalmPrepararation class
cal_palm = CalvinStatsmodelsPalm(input_csv_path=input_csv_path, output_dir=out_dir, sheet=sheet)
# Call the process_nifti_paths method
data_df = cal_palm.read_and_display_data()


# 01 - Preprocess Your Data

**Handle NANs**
- Set drop_nans=True is you would like to remove NaNs from data
- Provide a column name or a list of column names to remove NaNs from

In [None]:
data_df.columns

In [None]:
drop_list = ['Age', 'Z_Scored_Percent_Cognitive_Improvement', 'Hippocampus_GM_Vol', 'Z_Scored_Subiculum_T_By_Origin_Group_']

In [None]:
data_df = cal_palm.drop_nans_from_columns(columns_to_drop_from=drop_list)
data_df

**Drop Row Based on Value of Column**

Define the column, condition, and value for dropping rows
- column = 'your_column_name'
- condition = 'above'  # Options: 'equal', 'above', 'below'

In [None]:
data_df.columns

Set the parameters for dropping rows

In [None]:
column = 'City'  # The column you'd like to evaluate
condition = 'equal'  # The condition to check ('equal', 'above', 'below', 'not')
value = 'Boston' # The value to drop if T

In [None]:
data_df, other_df = cal_palm.drop_rows_based_on_value(column, condition, value)
display(data_df)

**Standardize Data**
- Enter Columns you Don't want to standardize into a list

In [None]:
data_df.columns

In [None]:
# Remove anything you don't want to standardize
cols_not_to_standardize = ['Z_Scored_Subiculum_Connectivity_T', 'Z_Scored_Percent_Cognitive_Improvement']

In [None]:
data_df = cal_palm.standardize_columns(cols_not_to_standardize)
data_df

# Moderated Causal Steps Mediation
- Variable M is a mediator if X significantly accounts for variability in M, X significantly accounts for variability in Y, M significantly accounts for variability in Y when controlling for X, and the effect of X on Y decreases substantially when M is entered simultaneously with X as a predictor of Y.
- some authors (Collins, Graham, & Flaherty, 1998; Judd & Kenny, 1981; Kenny et al., 1998; MacKinnon, 1994, 2000; MacKinnon, Krull, & Lockwood, 2000; Shrout & Bolger, 2002) have argued that a significant total effect of X on Y (quantified as c in
Figure 1) is not necessary for mediation to occur.

In [None]:
import numpy as np
import pandas as pd
from statsmodels.formula.api import ols

class ModeratedCausalStepsAnalysis:
    def __init__(self, dataframe, iv, mediator, moderator, dv):
        """
        A class to perform a moderated causal steps analysis, which assesses whether mediation and/or moderated 
        mediation exists in a causal relationship, using a series of regression models. This approach is akin 
        to instrumental variable mediation analysis and follows causal steps to identify direct and indirect effects.

        TODO: 
        - Make moderation conditional, so this can be applied as a mediation analysis
        
        Parameters:
        ----------
        dataframe : pandas.DataFrame
            The dataset containing all the variables.
        iv : str
            The name of the independent variable (X).
        mediator : str
            The name of the mediator variable (M).
        moderator : str
            The name of the moderator variable (Z).
        dv : str
            The name of the dependent variable (Y).
        control_vars : list of str, optional
            A list of control variables to include in the analysis (default is None).
            
        Methods:
        --------
        fit_step_1():
            Fits a regression model with the dependent variable (Y) predicted by independent variable (IV).
                        
        fit_step_2():
            Fits the model where the mediator (M) is predicted by the independent variable (X), the moderator (Z),
            and the interaction between X and Z (XZ), which checks whether there is moderation on Path a (IV -> M).
            
        fit_step_3():
            Fits the model where the dependent variable (Y) is predicted by the independent variable (X), 
            the moderator (Z), their interaction (XZ), and the mediator (M). This checks for mediation 
            (via M) and moderation on Path b (M -> Y).
            Note: This equation is used to establish that M is related to Y, as evidenced by Beta_M, 
            and to assess whether the XZ interaction captured by Beta_XZ is no longer significant, 
            which is taken as evidence that M mediates the effect of the XZ interaction on Y.
            Some folks replace XZ with XM, or add XM and XZ (Muller et al 2005.)
            
        summary():
            Prints the summaries of all fitted models to assess mediation, moderation, and mediated moderation.

        Analysis Process:
        -----------------
        1. Step 1 (Baseline Model): The first step establishes the baseline relationship between the dependent 
            variable (Y) and independent variable (IV). This can serve as a reference model for the final analysis.
        
        2. Step 2 (Mediator Model): The second step tests whether the independent variable (X), the moderator (Z), 
            and their interaction (XZ) predict the mediator (M). This assesses the moderated Path a, which tells us whether 
            the effect of X on M is moderated by Z.

        3. Step 3 (Final Outcome Model): The third step tests whether the independent variable (X), the moderator (Z), 
            the interaction term (XZ), and the mediator (M) together predict the dependent variable (Y). This checks for mediation 
            by the mediator (M) and whether moderation exists on Path b (M -> Y).

        Assessing Mediation:
        --------------------
        - Direct Effect: If (X) remains a significant predictor of (Y) in Step 3, while controlling for (M), 
            this indicates a direct effect of (X) on (Y) (Path c').
            
        - Indirect Effect: If (M) is a significant predictor of (Y) in Step 3, and if (X) was a significant 
            predictor of (M) in Step 2, this indicates an indirect (mediated) effect of (X) on (Y) through (M).
            
        - Full Mediation: Occurs when the direct effect of (X) on (Y) is no longer significant in Step 3, while 
            (M) significantly predicts (Y).
            
        - Partial Mediation: Occurs when both \(X\) and \(M\) significantly predict (Y) in Step 3, suggesting both 
         direct and indirect effects of (X) on (Y).

        Assessing Moderated Mediation:
        ------------------------------
        - Moderation on Path a (IV -> M): If the interaction term (XZ) is significant in Step 2, it indicates that 
            the effect of (X) on (M) (Path a) is moderated by (Z). In this case, moderation exists on the path from 
            (X) to (M), suggesting that the relationship between the independent variable and the mediator varies by the moderator.
            
        - Moderation on Path b (M -> Y): If the interaction term (XZ) or (MZ) is significant in Step 3, it indicates 
            that the effect of (M) on (Y) is moderated by (Z), meaning the strength of the mediation depends on (Z).

        Summary of Outcomes:
        --------------------
        - No Direct Effect: If Step 1 is not significant and remains insignificant in Step 3. 
        - No Mediation: If (M) does not significantly predict (Y) in Step 3, no mediation is present.
        - Partial Mediation: If (M) and (X) both predict (Y) in Step 3, partial mediation exists.
        - Full Mediation: If (M) predicts (Y) and (X) no longer predicts (Y) in Step 3, full mediation is observed.
        - Moderated Mediation: Occurs when the effect of (M) on (Y) is moderated by (Z) (significant interaction terms).
        """
        self.dataframe = dataframe
        self.iv = iv                # Independent Variable (X)
        self.mediator = mediator    # Mediator Variable (M)
        self.moderator = moderator  # Moderator Variable (Z)
        self.dv = dv                # Dependent Variable (Y)
        
        # Placeholders for storing models
        self.step_1_model = None
        self.step_2_model = None
        self.step_3_model = None

    def fit_step_1(self):
        """
        Fits the baseline model for Y with just control variables (C).

        Formula: Y ~ IV
        """
        formula = f"{self.dv} ~ {self.iv}"
        self.step_1_model = ols(formula, data=self.dataframe).fit()
        return self.step_1_model

    def fit_step_2(self):
        """
        Fits the model for M as a function of X, Z, and the interaction XZ.

        Formula: M ~ X + Z + X:Z
        """
        formula = f"{self.mediator} ~ {self.iv} + {self.moderator} + {self.iv}:{self.moderator}"
        self.step_2_model = ols(formula, data=self.dataframe).fit()
        return self.step_2_model

    def fit_step_3(self):
        """
        Fits the model for Y as a function of X, Z, XZ, and M.

        Formula: Y ~ X + Z + M + M:Z + X:Z
        - Based on Edward and Lambert 2007, and Muller et al. 2005, combining XZ with XM.
        """
        formula = f"{self.dv} ~ {self.iv} + {self.moderator} + {self.mediator} + {self.mediator}:{self.moderator} + {self.iv}:{self.moderator}"            
        self.step_3_model = ols(formula, data=self.dataframe).fit()
        return self.step_3_model

    def summary(self):
        """
        Prints a summary of all models (Step 1, Step 2, and Step 3).
        """
        print("\n--- Step 1 (Y ~ X) ---")
        if self.step_1_model:
            print(self.step_1_model.summary2())
        else:
            print("Step 1 model not fitted yet.")

        print("\n--- Step 2 (M ~ X + Z + X:Z) ---")
        if self.step_2_model:
            print(self.step_2_model.summary2())
        else:
            print("Step 2 model not fitted yet.")

        print("\n--- Step 3 (Y ~ X + Z + M + M:Z + X:Z) ---")
        if self.step_3_model:
            print(self.step_3_model.summary2())
        else:
            print("Step 3 model not fitted yet.")



In [None]:
data_df.columns

In [None]:
# Initialize the class for the causal steps analysis
medmod = ModeratedCausalStepsAnalysis(
    dataframe=data_df, 
    iv='Subiculum_Grey_Matter', 
    mediator='Age', 
    moderator='Z_Scored_Subiculum_Connectivity_T', 
    dv='Z_Scored_Percent_Cognitive_Improvement'
)

# Run each step of the causal steps analysis
medmod.fit_step_1()  # Step 1: Y ~ C (if no control vars, intercept-only model)
medmod.fit_step_2()  # Step 2: M ~ X + Z + X:Z (moderation on path a)
medmod.fit_step_3()  # Step 3: Y ~ X + Z + X:Z + M (moderation on path b, checking mediation)

# Display the results
medmod.summary()
