## Prep

In [2]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import random
import seaborn as sns

import statsmodels.api as sm
from scipy import stats
import statsmodels.tools as tools

import warnings
warnings.filterwarnings('ignore')

pd.set_option('max_columns', None)
pd.set_option('max_rows', 100)

In [2]:
mesa = pd.read_csv('../mesa/data_processed/Y_BaselineX_raw_full.csv')

mesa = mesa.rename(columns = {'cvda': 'Y_tot', 
                             'F1_PC2': 'nSES',
                            'S1FAV': 'nFavFood',
                            'S1PAI': 'nPhysFac', 
                            'G_bla_rk': 'nRS', 
                            'chdiet': 'nutrition', 
                            'chphysact': 'PhysAct',
                            'income': 'FamIncome'})

diet_phys_map = {0:1, 1:2, 2:3}
mesa['nutrition'] = mesa['nutrition'].replace(diet_phys_map)
mesa['PhysAct'] = mesa['PhysAct'].replace(diet_phys_map)

mesa_bla = mesa[mesa['race'] == 3]

In [5]:
jhs_outcome_full = pd.read_csv('../jhs/data/processed/jhs_cox_base.csv')
jhs_outcome = jhs_outcome_full[['subjid','event','time']].copy()

jhs_covar = pd.read_csv('../jhs/data/processed/jhs_raw_full.csv')
jhs_covar = jhs_covar[jhs_covar['visit'] == 1]

jhs = pd.merge(jhs_outcome, jhs_covar, on=['subjid'], how='left')


jhs = jhs.rename(columns = {'event': 'Y_tot', 
                             'nbSESpc2score': 'nSES',
                            'S1FAV': 'nFavFood',
                            'S1PAI': 'nPhysFac', 
                            'G_bla_rk': 'nRS', 
                            'nutrition3cat': 'nutrition', 
                            'PA3cat': 'PhysAct',
                            'fmlyinc': 'FamIncome'})
jhs['nutrition'] = jhs['nutrition'].replace(diet_phys_map)
jhs['PhysAct'] = jhs['PhysAct'].replace(diet_phys_map)

Unnamed: 0,subjid,event,time,visit,nSES,nbSESpc2score,currentSmoker,Diabetes,sex,age,sbp,hdl,totchol,MIHx,strokeHx,CHDHx,CVDHx,S1FAV,S1PAI,G_bla_rk,VisitDate,nutrition3cat,PA3cat,fmlyinc,alc,gender
0,J100079,0,2975,1,0.0,0.55,0.0,1.0,Female,62,102.74,65.0,282.0,0.0,0.0,0.0,0.0,0.318447,0.636895,1.536694,01/22/2001,1.0,1.0,1.0,1.0,0
1,J100180,0,4220,1,1.0,1.62,0.0,0.0,Female,75,122.91,71.0,245.0,0.0,0.0,0.0,0.0,0.318447,0.318447,3.196787,02/24/2001,1.0,1.0,2.0,0.0,0
2,J100953,0,2932,1,0.0,0.75,1.0,0.0,Female,60,110.08,44.0,153.0,0.0,0.0,0.0,0.0,0.636895,0.636895,3.768009,04/20/2001,0.0,0.0,2.0,1.0,0
3,J101030,0,4184,1,1.0,0.83,0.0,1.0,Male,60,121.08,51.0,189.0,0.0,0.0,0.0,0.0,0.318447,0.318447,2.772677,04/22/2001,0.0,0.0,3.0,0.0,1
4,J101134,0,3108,1,0.0,-0.05,0.0,0.0,Female,65,116.49,55.0,257.0,0.0,0.0,0.0,0.0,0.318447,0.318447,2.123658,10/11/2000,,1.0,1.0,1.0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
3266,J599265,0,2576,1,0.0,0.30,1.0,1.0,Female,44,126.58,45.0,200.0,0.0,0.0,0.0,0.0,0.000000,0.000000,-2.707370,03/28/2004,0.0,0.0,3.0,1.0,0
3267,J599276,0,2576,1,0.0,0.75,0.0,0.0,Male,43,116.49,74.0,215.0,0.0,0.0,0.0,0.0,0.318447,0.318447,3.768009,03/29/2004,0.0,1.0,4.0,1.0,1
3268,J599282,0,2565,1,0.0,0.24,0.0,0.0,Male,58,118.33,59.0,277.0,0.0,0.0,0.0,0.0,1.273789,0.318447,3.311407,03/25/2004,1.0,0.0,4.0,0.0,1
3269,J599323,0,2582,1,0.0,0.72,0.0,0.0,Female,48,164.17,48.0,172.0,0.0,0.0,0.0,0.0,0.318447,0.000000,-0.108509,03/30/2004,0.0,0.0,3.0,0.0,0


## Mesa

In [7]:
y = mesa['Y_tot']

X = mesa[['nSES','nFavFood','nPhysFac', 'nRS',
          'FamIncome','nutrition', 'PhysAct',
          'age','gender','race','cig','cural','diabet','hdl','chol','sbp']]

X1 = X.copy()
X2 = X.copy()
X3 = X.copy()
X4 = X.copy()
X5 = X.copy()
X6 = X.copy()

X1['nSES_FamIncome'] = X['nSES'] * X['FamIncome']
X1 = pd.get_dummies(X1, columns=['nSES','nFavFood','nPhysFac', 'nRS',
                                 'FamIncome','nutrition', 'PhysAct',
                                 'gender','race','cig','cural','diabet','nSES_FamIncome'], 
                    drop_first=True)

X2['nFavFood_FamIncome'] = X['nFavFood'] * X['FamIncome']
X3['nPhysFac_FamIncome'] = X['nPhysFac'] * X['FamIncome']
X4['nRS_FamIncome'] = X['nRS'] * X['FamIncome']
X5['nFavFood_nutrition'] = X['nFavFood'] * X['nutrition']
X6['nPhysFac_PhysAct'] = X['nPhysFac'] * X['PhysAct']

In [8]:
logistic_model_with_moderation = sm.Logit(y, sm.add_constant(X1)).fit()
print(logistic_model_with_moderation.summary())

Optimization terminated successfully.
         Current function value: 0.409347
         Iterations 7
                           Logit Regression Results                           
Dep. Variable:                  Y_tot   No. Observations:                 6787
Model:                          Logit   Df Residuals:                     6749
Method:                           MLE   Df Model:                           37
Date:                Sat, 04 Nov 2023   Pseudo R-squ.:                 0.09941
Time:                        17:16:53   Log-Likelihood:                -2778.2
converged:                       True   LL-Null:                       -3084.9
Covariance Type:            nonrobust   LLR p-value:                1.523e-105
                          coef    std err          z      P>|z|      [0.025      0.975]
---------------------------------------------------------------------------------------
const                  -5.2108      0.409    -12.744      0.000      -6.012      -4.409
ag

In [None]:
## issues:
## 2*3 = 3*2; inconsistent significance across dif subgroups