## Prep

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns

import statsmodels.api as sm
from scipy import stats
import statsmodels.tools as tools

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 100)

### Raw data

In [None]:
mesa_raw = pd.read_csv('../mesa/data_processed/Y_BaselineX_raw_full.csv')

mesa_raw = mesa_raw.rename(columns = {'cvda': 'Y_tot', 
                                     'F1_PC2': 'nSES',
                                    'S1FAV': 'nFavFood',
                                    'S1PAI': 'nPhysFac', 
                                    'G_bla_rk': 'nRS', 
                                    'chdiet': 'nutrition', 
                                    'chphysact': 'PhysAct',
                                    'income': 'FamIncome',
                                    'cig':'currentSmoker',
                                    'cural':'alc',
                                      'diabet': 'Diabetes',
                                      'chol':'totchol'})

diet_phys_map = {0:1, 1:2, 2:3}
mesa_raw['nutrition'] = mesa_raw['nutrition'].replace(diet_phys_map)
mesa_raw['PhysAct'] = mesa_raw['PhysAct'].replace(diet_phys_map)

mesa = mesa_raw.copy()
mesa = mesa.dropna()

mesa_bla = mesa[mesa['race'] == 3]


In [None]:
jhs_outcome_full = pd.read_csv('../jhs/data/processed/jhs_cox_base.csv')
jhs_outcome = jhs_outcome_full[['subjid','event','time']].copy()

jhs_covar = pd.read_csv('../jhs/data/processed/jhs_raw_full.csv')
jhs_covar = jhs_covar[jhs_covar['visit'] == 1]

jhs_raw = pd.merge(jhs_outcome, jhs_covar, on=['subjid'], how='left')

jhs_raw = jhs_raw.drop(jhs_raw.columns[4], axis=1)

jhs_raw = jhs_raw.rename(columns = {'event': 'Y_tot', 
                             'nbSESpc2score': 'nSES',
                            'S1FAV': 'nFavFood',
                            'S1PAI': 'nPhysFac', 
                            'G_bla_rk': 'nRS', 
                            'nutrition3cat': 'nutrition', 
                            'PA3cat': 'PhysAct',
                            'fmlyinc': 'FamIncome'})
jhs_raw['nutrition'] = jhs_raw['nutrition'].replace(diet_phys_map)
jhs_raw['PhysAct'] = jhs_raw['PhysAct'].replace(diet_phys_map)

jhs = jhs_raw.copy()
jhs = jhs.dropna()

### Raw data with missing imputed 

In [None]:
def fillna_cat(df,cat_feat):
    for feat in cat_feat:
        df[feat].fillna(df[feat].mode()[0], inplace=True)
    return df

def fillna_cont(df,cont_feat):
    df= df.fillna((df[cont_feat].mean()))
    return df

cat_feat = ['FamIncome', 'nutrition', 'PhysAct',
           'currentSmoker','alc','Diabetes']
cont_feat = ['nSES','nFavFood','nPhysFac', 'nRS',
           'hdl','totchol','sbp']

In [None]:
mesa = fillna_cat(mesa_raw,cat_feat)  
mesa = fillna_cat(mesa_raw,cont_feat)  

mesa_bla = mesa[mesa['race'] == 3]

In [None]:
jhs = fillna_cat(jhs_raw,cat_feat)  
jhs = fillna_cat(jhs_raw,cont_feat)  

### Processed data

In [None]:
mesa = pd.read_csv('../mesa/data_processed/Y_BaselineX_processed_full.csv')

mesa = mesa.rename(columns = {'cvda': 'Y_tot', 
                             'F1_PC2': 'nSES',
                            'S1FAV': 'nFavFood',
                            'S1PAI': 'nPhysFac', 
                            'G_bla_rk': 'nRS', 
                            'chdiet': 'nutrition', 
                            'chphysact': 'PhysAct',
                            'income': 'FamIncome',
                             'cig':'currentSmoker',
                            'cural':'alc',
                              'diabet': 'Diabetes',
                              'chol':'totchol'})

diet_phys_map = {0:1, 1:2, 2:3}
mesa['nutrition'] = mesa['nutrition'].replace(diet_phys_map)
mesa['PhysAct'] = mesa['PhysAct'].replace(diet_phys_map)

mesa_bla = mesa[mesa['race'] == 3]

In [None]:
jhs = pd.read_csv('../jhs/data/processed/jhs_cox_base.csv')
jhs = jhs.drop("nSES", axis=1)

jhs = jhs.rename(columns = {'event': 'Y_tot', 
                             'nbSESpc2score': 'nSES',
                            'S1FAV': 'nFavFood',
                            'S1PAI': 'nPhysFac', 
                            'G_bla_rk': 'nRS', 
                            'nutrition3cat': 'nutrition', 
                            'PA3cat': 'PhysAct',
                            'fmlyinc': 'FamIncome'})

jhs['nutrition'] = jhs['nutrition'].replace(diet_phys_map)
jhs['PhysAct'] = jhs['PhysAct'].replace(diet_phys_map)

## Mesa

In [None]:
y = mesa['Y_tot']

X = mesa[['nSES','nFavFood','nPhysFac', 'nRS',
          'FamIncome','nutrition', 'PhysAct',
          'age','gender','race','currentSmoker','alc','Diabetes','hdl','totchol','sbp']]

X = pd.get_dummies(X, columns=['gender','race','currentSmoker','alc','Diabetes'], 
                    drop_first=True)

X1 = X.copy()
X2 = X.copy()
X3 = X.copy()
X4 = X.copy()
X5 = X.copy()
X6 = X.copy()

X1['nSES_FamIncome'] = X['nSES'] * X['FamIncome']
X2['nFavFood_FamIncome'] = X['nFavFood'] * X['FamIncome']
X3['nPhysFac_FamIncome'] = X['nPhysFac'] * X['FamIncome']
X4['nRS_FamIncome'] = X['nRS'] * X['FamIncome']
X5['nFavFood_nutrition'] = X['nFavFood'] * X['nutrition']
X6['nPhysFac_PhysAct'] = X['nPhysFac'] * X['PhysAct']

In [None]:
logistic_model_with_moderation = sm.Logit(y, sm.add_constant(X6)).fit()
print(logistic_model_with_moderation.summary())

## MESA BLACK

In [None]:
y = mesa_bla['Y_tot']

X = mesa_bla[['nSES','nFavFood','nPhysFac', 'nRS',
          'FamIncome','nutrition', 'PhysAct',
          'age','gender','race','currentSmoker','alc','Diabetes','hdl','totchol','sbp']]

X = pd.get_dummies(X, columns=['gender','race','currentSmoker','alc','Diabetes'], 
                    drop_first=True)

X1 = X.copy()
X2 = X.copy()
X3 = X.copy()
X4 = X.copy()
X5 = X.copy()
X6 = X.copy()

X1['nSES_FamIncome'] = X['nSES'] * X['FamIncome']
X2['nFavFood_FamIncome'] = X['nFavFood'] * X['FamIncome']
X3['nPhysFac_FamIncome'] = X['nPhysFac'] * X['FamIncome']
X4['nRS_FamIncome'] = X['nRS'] * X['FamIncome']
X5['nFavFood_nutrition'] = X['nFavFood'] * X['nutrition']
X6['nPhysFac_PhysAct'] = X['nPhysFac'] * X['PhysAct']

In [None]:
logistic_model_with_moderation = sm.Logit(y, sm.add_constant(X6)).fit()
print(logistic_model_with_moderation.summary())

## JHS

In [None]:
y = jhs['Y_tot']

X = jhs[['nSES','nFavFood','nPhysFac', 'nRS',
          'FamIncome','nutrition', 'PhysAct',
          'age','gender','currentSmoker','alc','Diabetes','hdl','totchol','sbp']]
X = pd.get_dummies(X, columns=['gender','currentSmoker','alc','Diabetes'], 
                    drop_first=True)

X1 = X.copy()
X2 = X.copy()
X3 = X.copy()
X4 = X.copy()
X5 = X.copy()
X6 = X.copy()

X1['nSES_FamIncome'] = X['nSES'] * X['FamIncome']
X2['nFavFood_FamIncome'] = X['nFavFood'] * X['FamIncome']
X3['nPhysFac_FamIncome'] = X['nPhysFac'] * X['FamIncome']
X4['nRS_FamIncome'] = X['nRS'] * X['FamIncome']
X5['nFavFood_nutrition'] = X['nFavFood'] * X['nutrition']
X6['nPhysFac_PhysAct'] = X['nPhysFac'] * X['PhysAct']

In [None]:
logistic_model_with_moderation = sm.Logit(y, sm.add_constant(X6)).fit()
print(logistic_model_with_moderation.summary())

## Mediation Test

on the significant moderation interactions:
    MESA_Black: n_Phys_Fac*FamIncome, RS*FamIncome, n_Fav_Food*nutrition
    JHS: n_Phys_Fac*physical_act

In [None]:
# Step 1: Fit logistic regression model for the Y_tot with the independent variable
model_Y_tot = sm.Logit(jhs['Y_tot'], sm.add_constant(jhs['nPhysFac'])).fit()

# Step 2: Fit linear regression model for the PhysAct with the independent variable
model_PhysAct = sm.OLS(jhs['PhysAct'], sm.add_constant(jhs['nPhysFac'])).fit()

# Step 3: Fit logistic regression model for the Y_tot with both the independent variable and PhysAct
model_combined = sm.Logit(jhs['Y_tot'], sm.add_constant(jhs[['nPhysFac', 'PhysAct']])).fit()

# Step 4: Calculate the indirect effect and its confidence interval
indirect_effect = model_combined.params['PhysAct'] * model_Y_tot.params['nPhysFac']
indirect_effect_se = indirect_effect * (model_PhysAct.HC0_se['nPhysFac'] / model_Y_tot.params['nPhysFac'])

# Step 5: Summarize results
print("Indirect Effect:", round(indirect_effect, 4))
print("95% Confidence Interval for Indirect Effect:", 
      (round(indirect_effect - 1.96 * indirect_effect_se, 4), round(indirect_effect + 1.96 * indirect_effect_se,4)))