In [33]:
import pandas as pd
import numpy as np
import statsmodels.api as sm
from statsmodels.stats.mediation import Mediation
from sklearn.preprocessing import StandardScaler
from statsmodels.formula.api import ols


In [37]:
data = pd.read_csv('cleaning data/participant_combined.csv', delimiter=',')
data.drop(columns=['eid', 'Date_of_attending_assessment_centre', 'X34.0.0', 'Gender', 'Ethnicity', 'Migrant.Status', 'TDI.Tertiles', 'Highest.Qualification', 'House.Ownership', 'Income', 'Cohabiting', 'Living.Alone', 'AUDIT.Score', 'Smoker', 'Moderate.Physical.Activity', 'Longstanding.Illness', 'Diabetes', 'Cancer'], inplace=True, errors='ignore')
data.columns = data.columns.str.replace('.', '_', regex=False)
data.columns = data.columns.str.replace('(', '', regex=False)
data.columns = data.columns.str.replace(')', '', regex=False)
data.columns = data.columns.str.replace('-', '', regex=False)
# Remnant_Cholesterol_(Non-HDL,_Non-LDL_-Cholesterol)
data.columns = data.columns.str.replace('Remnant_Cholesterol_NonHDL,_NonLDL_Cholesterol', 'Remnant_Cholesterol_NonHDL_NonLDL_Cholesterol')

depression_atr = [
    'Depressed_At_Baseline', 'Loneliness', 'Social_Isolation', 'PHQ9_No_Info', 
    'PHQ9_Screen', 'PHQ9_Items', 'PHQ9_Severity', 'CIDI_MDD_No_Info', 
    'CIDI_MDD_Screen', 'CIDI_MDD_Response', 'CIDI_MDD_Severity', 'GAD_CIDI_Somatic'
]
CVD = 'CVD'
NMR_atr = [
    'Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesterol_in_IDL', 'Cholesterol_in_Large_HDL', 'Cholesterol_in_Large_LDL', 'Cholesterol_in_Large_VLDL', 'Cholesterol_in_Medium_HDL', 'Cholesterol_in_Medium_LDL', 'Cholesterol_in_Medium_VLDL', 'Cholesterol_in_Small_HDL', 'Cholesterol_in_Small_LDL', 'Cholesterol_in_Small_VLDL', 'Cholesterol_in_Very_Large_HDL', 'Cholesterol_in_Very_Large_VLDL', 'Cholesterol_in_Very_Small_VLDL', 'Clinical_LDL_Cholesterol', 'HDL_Cholesterol', 'LDL_Cholesterol', 'Remnant_Cholesterol_NonHDL_NonLDL_Cholesterol', 'Total_Cholesterol', 'Total_Cholesterol_Minus_HDLC', 'Total_Concentration_of_Lipoprotein_Particles', 'Total_Esterified_Cholesterol', 'VLDL_Cholesterol',
    'Cholesteryl_Esters_in_Chylomicrons_and_Extremely_Large_VLDL', 'Cholesteryl_Esters_in_HDL', 'Cholesteryl_Esters_in_IDL', 'Cholesteryl_Esters_in_Large_HDL', 'Cholesteryl_Esters_in_Large_LDL', 'Cholesteryl_Esters_in_Large_VLDL', 'Cholesteryl_Esters_in_LDL', 'Cholesteryl_Esters_in_Medium_HDL', 'Cholesteryl_Esters_in_Medium_LDL', 'Cholesteryl_Esters_in_Medium_VLDL', 'Cholesteryl_Esters_in_Small_HDL', 'Cholesteryl_Esters_in_Small_LDL', 'Cholesteryl_Esters_in_Small_VLDL', 'Cholesteryl_Esters_in_Very_Large_HDL', 'Cholesteryl_Esters_in_Very_Large_VLDL', 'Cholesteryl_Esters_in_Very_Small_VLDL', 'Cholesteryl_Esters_in_VLDL',
    'Concentration_of_Chylomicrons_and_Extremely_Large_VLDL_Particles', 'Concentration_of_HDL_Particles', 'Concentration_of_IDL_Particles', 'Concentration_of_Large_HDL_Particles', 'Concentration_of_Large_LDL_Particles', 'Concentration_of_Large_VLDL_Particles', 'Concentration_of_LDL_Particles', 'Concentration_of_Medium_HDL_Particles', 'Concentration_of_Medium_LDL_Particles', 'Concentration_of_Medium_VLDL_Particles', 'Concentration_of_Small_HDL_Particles', 'Concentration_of_Small_LDL_Particles', 'Concentration_of_Small_VLDL_Particles', 'Concentration_of_Very_Large_HDL_Particles', 'Concentration_of_Very_Large_VLDL_Particles', 'Concentration_of_Very_Small_VLDL_Particles', 'Concentration_of_VLDL_Particles', 
    'Free_Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL', 'Free_Cholesterol_in_HDL', 'Free_Cholesterol_in_IDL', 'Free_Cholesterol_in_Large_HDL', 'Free_Cholesterol_in_Large_LDL', 'Free_Cholesterol_in_Large_VLDL', 'Free_Cholesterol_in_LDL', 'Free_Cholesterol_in_Medium_HDL', 'Free_Cholesterol_in_Medium_LDL', 'Free_Cholesterol_in_Medium_VLDL', 'Free_Cholesterol_in_Small_HDL', 'Free_Cholesterol_in_Small_LDL', 'Free_Cholesterol_in_Small_VLDL', 'Free_Cholesterol_in_Very_Large_HDL', 'Free_Cholesterol_in_Very_Large_VLDL', 'Free_Cholesterol_in_Very_Small_VLDL', 'Free_Cholesterol_in_VLDL', 'Total_Free_Cholesterol',
    'Phospholipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Phospholipids_in_HDL', 'Phospholipids_in_IDL', 'Phospholipids_in_Large_HDL', 'Phospholipids_in_Large_LDL', 'Phospholipids_in_Large_VLDL', 'Phospholipids_in_LDL', 'Phospholipids_in_Medium_HDL', 'Phospholipids_in_Medium_LDL', 'Phospholipids_in_Medium_VLDL', 'Phospholipids_in_Small_HDL', 'Phospholipids_in_Small_LDL', 'Phospholipids_in_Small_VLDL', 'Phospholipids_in_Very_Large_HDL', 'Phospholipids_in_Very_Large_VLDL', 'Phospholipids_in_Very_Small_VLDL', 'Phospholipids_in_VLDL', 'Total_Phospholipids_in_Lipoprotein_Particles',
    'Total_Lipids_in_Chylomicrons_and_Extremely_Large_VLDL', 'Total_Lipids_in_HDL', 'Total_Lipids_in_IDL', 'Total_Lipids_in_Large_HDL', 'Total_Lipids_in_Large_LDL', 'Total_Lipids_in_Large_VLDL', 'Total_Lipids_in_LDL', 'Total_Lipids_in_Lipoprotein_Particles', 'Total_Lipids_in_Medium_HDL', 'Total_Lipids_in_Medium_LDL', 'Total_Lipids_in_Medium_VLDL', 'Total_Lipids_in_Small_HDL', 'Total_Lipids_in_Small_LDL', 'Total_Lipids_in_Small_VLDL', 'Total_Lipids_in_Very_Large_HDL', 'Total_Lipids_in_Very_Large_VLDL', 'Total_Lipids_in_Very_Small_VLDL', 'Total_Lipids_in_VLDL',
    'Total_Triglycerides', 'Triglycerides_in_Chylomicrons_and_Extremely_Large_VLDL', 'Triglycerides_in_HDL', 'Triglycerides_in_IDL', 'Triglycerides_in_Large_HDL', 'Triglycerides_in_Large_LDL', 'Triglycerides_in_Large_VLDL', 'Triglycerides_in_LDL', 'Triglycerides_in_Medium_HDL', 'Triglycerides_in_Medium_LDL', 'Triglycerides_in_Medium_VLDL', 'Triglycerides_in_Small_HDL', 'Triglycerides_in_Small_LDL', 'Triglycerides_in_Small_VLDL', 'Triglycerides_in_Very_Large_HDL', 'Triglycerides_in_Very_Large_VLDL', 'Triglycerides_in_Very_Small_VLDL', 'Triglycerides_in_VLDL',
    'Apolipoprotein_A1', 'Apolipoprotein_B', 'Average_Diameter_for_HDL_Particles', 'Average_Diameter_for_LDL_Particles', 'Average_Diameter_for_VLDL_Particles', 'Glycoprotein_Acetyls'
    ]

scaler = StandardScaler()
data[depression_atr + NMR_atr + [CVD]] = scaler.fit_transform(data[depression_atr + NMR_atr + [CVD]])

mediation_results = {}
for mediator in NMR_atr:
    try:
        # only ind
        model_1 = ols(f'{CVD} ~ {" + ".join(depression_atr)}', data=data).fit()
        
        # mediator as dep
        mediator_model = ols(f'{mediator} ~ {" + ".join(depression_atr)}', data=data).fit()
        
        # both ind and mediator
        direct_model = ols(f'{CVD} ~ {" + ".join(depression_atr)} + {mediator}', data=data).fit()
        
        a = mediator_model.params[1:]  # Coeff depr predicting mediator
        b = direct_model.params[mediator]  # Coeff mediator predicting outcome
        c_prime = direct_model.params[1:len(depression_atr)+1]  # Coeff depr inpresence of mediator
        c = model_1.params[1:]  # Coeff depr w/out mediator

        indirect_effect = a * b
        total_effect = c
        direct_effect = c_prime
        
        mediation_results[mediator] = {
            'indirect_effect': indirect_effect,
            'direct_effect': direct_effect,
            'total_effect': total_effect
        }
    except Exception as e:
        mediation_results[mediator] = f"Error: {e}"

for mediator, result in mediation_results.items():
    print(f'Mediator: {mediator}')
    print(result)

Mediator: Cholesterol_in_Chylomicrons_and_Extremely_Large_VLDL
{'indirect_effect': Depressed_At_Baseline   -1.206790e-04
Loneliness               5.497884e-04
Social_Isolation         2.875373e-04
PHQ9_No_Info            -2.872964e-04
PHQ9_Screen              2.117921e-04
PHQ9_Items               1.433289e-06
PHQ9_Severity            4.979392e-04
CIDI_MDD_No_Info         1.134001e-03
CIDI_MDD_Screen          2.195985e-04
CIDI_MDD_Response        6.081295e+07
CIDI_MDD_Severity       -6.081295e+07
GAD_CIDI_Somatic        -9.017242e-05
dtype: float64, 'direct_effect': Depressed_At_Baseline    4.140900e-03
Loneliness               2.535409e-02
Social_Isolation         3.000875e-02
PHQ9_No_Info             4.504399e-02
PHQ9_Screen             -9.322185e-03
PHQ9_Items               2.881523e-03
PHQ9_Severity            3.049470e-02
CIDI_MDD_No_Info         4.152102e-02
CIDI_MDD_Screen          9.567436e-03
CIDI_MDD_Response        1.869336e+09
CIDI_MDD_Severity       -1.869336e+09
GAD_CIDI_S