In [1]:
import pandas as pd
from sklearn.preprocessing import MinMaxScaler
from semopy import Model

  from pandas.core import (


# Rheumatoid_arthritis

## Smoking Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing smoking scores, aging rate, and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')


# =================================================================
# Data Standardization/Normalization (excluding eid column)
# =================================================================
# Using MinMaxScaler to normalize all features to [0, 1] range
scaler = MinMaxScaler()

# Normalize all columns except 'eid' (patient identifier)
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)
data_standardized['eid'] = data['eid']  # Preserve original eid column

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
model_direct = Model('''
    # Direct effect model specification
    # Tests only the direct path from smoking to disease
    Disease ~ Smoking_Score
''')

# Fit the direct effect model
fit_direct = model_direct.fit(data_standardized)
params_direct = model_direct.inspect()  # Get model parameters

# =================================================================
# Model 2: Full Model with Mediation Path
# =================================================================
model_full = Model('''
    # Full mediation model specification
    # Includes both direct and indirect (mediated) effects
    
    # Direct effect path
    Disease ~ Smoking_Score
    
    # Mediation paths:
    # 1. Smoking -> Aging Rate
    # 2. Aging Rate -> Disease
    aging_rate ~ Smoking_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)
params_full = model_full.inspect()  # Get model parameters

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Find matching path in parameters
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    else:
        return None, None

# Extract results from Model 1 (Direct only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Smoking_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Smoking_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Smoking_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Output Results
# =================================================================
print("\n[Model 1] Direct Effect Only (No Mediator)")
print(f"Smoking_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Path)")
print(f"Smoking_Score → Disease direct path coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Smoking_Score → aging_rate path coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5f}")
print(f"aging_rate → Disease path coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effect Only (No Mediator)
Smoking_Score → Disease path coefficient: 0.00009, p-value: 0.95515

[Model 2] Full Model (With Mediation Path)
Smoking_Score → Disease direct path coefficient: 0.00007, p-value: 0.96834
Smoking_Score → aging_rate path coefficient: -0.00152, p-value: 0.57013
aging_rate → Disease path coefficient: -0.01068, p-value: 0.00421


## Drinking Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing drinking scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']  

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of drinking on disease (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if drinking score directly predicts disease status
    Disease ~ Drink_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Drinking -> Disease
# 2. Indirect: Drinking -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Drink_Score
    
    # Mediation paths:
    # 1. Drinking -> Aging Rate
    # 2. Aging Rate -> Disease
    aging_rate ~ Drink_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Create boolean mask to find specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    else:
        return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Drink_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Drink_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Drink_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Output Results
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediator)")
print(f"Drink_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Paths)")
print(f"Direct path: Drink_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Mediation path 1: Drink_Score → aging_rate coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5f}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effects Only (No Mediator)
Drink_Score → Disease path coefficient: 0.00061, p-value: 0.78168

[Model 2] Full Model (With Mediation Paths)
Direct path: Drink_Score → Disease coefficient: 0.00043, p-value: 0.84584
Mediation path 1: Drink_Score → aging_rate coefficient: -0.00222, p-value: 0.04708
Mediation path 2: aging_rate → Disease coefficient: -0.01648, p-value: 0.14308


## Behavior Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing behavior scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']  

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of behavior on disease (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if behavior score directly predicts disease status
    Disease ~ Behavior_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Behavior -> Disease
# 2. Indirect: Behavior -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Behavior_Score
    
    # Mediation paths:
    # 1. Behavior -> Aging Rate
    # 2. Aging Rate -> Disease
    aging_rate ~ Behavior_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Create boolean mask to find specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    else:
        return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Behavior_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Behavior_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Behavior_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Output Results
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediator)")
print(f"Behavior_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Paths)")
print(f"Direct path: Behavior_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Mediation path 1: Behavior_Score → aging_rate coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5f}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effects Only (No Mediator)
Behavior_Score → Disease path coefficient: -0.00117, p-value: 0.56228

[Model 2] Full Model (With Mediation Paths)
Direct path: Behavior_Score → Disease coefficient: -0.00109, p-value: 0.59067
Mediation path 1: Behavior_Score → aging_rate coefficient: 0.00025, p-value: 0.81154
Mediation path 2: aging_rate → Disease coefficient: -0.01663, p-value: 0.13951


## Sport Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing sports participation scores, aging rate (AR), and disease status
# Dataset path: Processed Rheumatoid_arthritis data with lifestyle scores
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize all features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with normalized values but same column structure
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (excluding eid)
)

# Preserve original eid column unchanged
data_standardized['eid'] = data['eid']  

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of sports participation on disease status
model_direct = Model('''
    # Direct effect model specification
    # Examines if sports score directly affects disease status
    Disease ~ Sport_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Sports -> Disease
# 2. Indirect: Sports -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Sport_Score
    
    # Mediation pathways:
    # 1. Sports participation -> Biological aging rate
    # 2. Biological aging rate -> Disease status
    aging_rate ~ Sport_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Create boolean mask to locate specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Sport_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Sport_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Sport_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Results Output
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediation)")
print(f"Sport_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Pathways)")
print(f"Direct path: Sport_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Mediation path 1: Sport_Score → aging_rate coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5f}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effects Only (No Mediation)
Sport_Score → Disease path coefficient: 0.00185, p-value: 0.28481

[Model 2] Full Model (With Mediation Pathways)
Direct path: Sport_Score → Disease coefficient: 0.00350, p-value: 0.02371
Mediation path 1: Sport_Score → aging_rate coefficient: -0.00400, p-value: 0.09759
Mediation path 2: aging_rate → Disease coefficient: -0.01042, p-value: 0.00464


## Sleep Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing sleep scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']  

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of sleep on disease (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if sleep score directly predicts disease status
    Disease ~ Sleep_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Sleep -> Disease
# 2. Indirect: Sleep -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Sleep_Score
    
    # Mediation paths:
    # 1. Sleep -> Aging Rate
    # 2. Aging Rate -> Disease
    aging_rate ~ Sleep_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Create boolean mask to find specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    else:
        return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Sleep_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Sleep_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Sleep_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Output Results
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediator)")
print(f"Sleep_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Paths)")
print(f"Direct path: Sleep_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5e}")
print(f"Mediation path 1: Sleep_Score → aging_rate coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5e}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5e}")


[Model 1] Direct Effects Only (No Mediator)
Sleep_Score → Disease path coefficient: 0.00204, p-value: 0.27558

[Model 2] Full Model (With Mediation Paths)
Direct path: Sleep_Score → Disease coefficient: -0.00947, p-value: 6.21792e-12
Mediation path 1: Sleep_Score → aging_rate coefficient: 0.01923, p-value: 0.00000e+00
Mediation path 2: aging_rate → Disease coefficient: 0.41375, p-value: 0.00000e+00


## Weight Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing weight scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']  

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of weight on disease (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if weight score directly predicts disease status
    Disease ~ Weight_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Weight -> Disease
# 2. Indirect: Weight -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Weight_Score
    
    # Mediation paths:
    # 1. Weight -> Aging Rate
    # 2. Aging Rate -> Disease
    aging_rate ~ Weight_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Create boolean mask to find specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    else:
        return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Weight_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Weight_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Weight_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Output Results
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediator)")
print(f"Weight_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Paths)")
print(f"Direct path: Weight_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Mediation path 1: Weight_Score → aging_rate coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5f}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effects Only (No Mediator)
Weight_Score → Disease path coefficient: -0.00197, p-value: 0.29577

[Model 2] Full Model (With Mediation Paths)
Direct path: Weight_Score → Disease coefficient: -0.00187, p-value: 0.31989
Mediation path 1: Weight_Score → aging_rate coefficient: 0.00323, p-value: 0.00075
Mediation path 2: aging_rate → Disease coefficient: -0.01633, p-value: 0.14685


## Diet Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing diet scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of diet on disease (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if diet score directly predicts disease status
    Disease ~ Diet_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Diet -> Disease
# 2. Indirect: Diet -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Diet_Score
    
    # Mediation paths:
    # 1. Diet -> Aging Rate
    # 2. Aging Rate -> Disease
    aging_rate ~ Diet_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extract path coefficient and p-value between two variables
    
    Parameters:
    -----------
    params : DataFrame
        Model parameters dataframe from model.inspect()
    from_var : str
        Predictor variable name
    to_var : str
        Outcome variable name
        
    Returns:
    --------
    tuple
        (coefficient estimate, p-value) or (None, None) if path not found
    """
    # Create boolean mask to find specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    else:
        return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Diet_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Diet_Score', 'Disease')
effect_total_to_aging, p_total_to_aging = get_coeff_and_pvalue(
    params_full, 'Diet_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Output Results
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediator)")
print(f"Diet_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Paths)")
print(f"Direct path: Diet_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Mediation path 1: Diet_Score → aging_rate coefficient: {effect_total_to_aging:.5f}, p-value: {p_total_to_aging:.5f}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effects Only (No Mediator)
Diet_Score → Disease path coefficient: -0.00297, p-value: 0.20044

[Model 2] Full Model (With Mediation Paths)
Direct path: Diet_Score → Disease coefficient: -0.00312, p-value: 0.17960
Mediation path 1: Diet_Score → aging_rate coefficient: -0.00278, p-value: 0.01893
Mediation path 2: aging_rate → Disease coefficient: -0.01667, p-value: 0.13867


## Distress Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing distress scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']  

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of psychological distress on disease 
# (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if distress score directly predicts disease status
    Disease ~ Distress_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Distress -> Disease
# 2. Indirect: Distress -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Distress_Score
    
    # Mediation pathways:
    # 1. Psychological distress -> Biological aging rate
    # 2. Biological aging rate -> Disease status
    aging_rate ~ Distress_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extracts path coefficients and p-values from model parameters
    
    Parameters:
        params (DataFrame): Model parameters from model.inspect()
        from_var (str): Independent variable name
        to_var (str): Dependent variable name
        
    Returns:
        tuple: (coefficient_estimate, p_value) or (None, None) if path not found
    """
    # Create boolean mask to locate specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Distress_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Distress_Score', 'Disease')
effect_distress_to_aging, p_distress_to_aging = get_coeff_and_pvalue(
    params_full, 'Distress_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Results Output
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediation)")
print(f"Distress_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Pathways)")
print(f"Direct path: Distress_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5f}")
print(f"Mediation path 1: Distress_Score → aging_rate coefficient: {effect_distress_to_aging:.5f}, p-value: {p_distress_to_aging:.5e}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5f}")


[Model 1] Direct Effects Only (No Mediation)
Distress_Score → Disease path coefficient: -0.00087, p-value: 0.61041

[Model 2] Full Model (With Mediation Pathways)
Direct path: Distress_Score → Disease coefficient: -0.00098, p-value: 0.56570
Mediation path 1: Distress_Score → aging_rate coefficient: -0.01132, p-value: 0.00000e+00
Mediation path 2: aging_rate → Disease coefficient: -0.01597, p-value: 0.15712


## Social Score

In [None]:
# =================================================================
# Data Loading
# =================================================================
# Load dataset containing social connection scores, aging rate (AR), and disease status
# Data path points to processed Rheumatoid_arthritis dataset
data = pd.read_csv('../../../data/Dieases/9.Rheumatoid_arthritis/Score_all+AR+Dieases.csv')

# =================================================================
# Data Standardization/Normalization 
# =================================================================
# Initialize MinMaxScaler to normalize features to [0,1] range
scaler = MinMaxScaler()

# Normalize all features except 'eid' (patient identifier column)
# Returns DataFrame with same columns (excluding eid) but normalized values
data_standardized = pd.DataFrame(
    scaler.fit_transform(data.drop(columns='eid')),
    columns=data.columns[1:]  # Keep original column names (except eid)
)

# Preserve original eid column for reference
data_standardized['eid'] = data['eid']

# =================================================================
# Model 1: Direct Effect Only (No Mediation)
# =================================================================
# Tests ONLY the direct effect of social connections on disease
# (no mediation through aging rate)
model_direct = Model('''
    # Direct effect model specification
    # Tests if social score directly predicts disease status
    Disease ~ Social_Score
''')

# Fit the direct effect model to standardized data
fit_direct = model_direct.fit(data_standardized)

# Extract model parameters (coefficients, p-values, etc.)
params_direct = model_direct.inspect()

# =================================================================
# Model 2: Full Mediation Model (Direct + Indirect Effects)
# =================================================================
# Tests BOTH direct and indirect effects:
# 1. Direct: Social -> Disease
# 2. Indirect: Social -> Aging Rate -> Disease
model_full = Model('''
    # Direct effect path (same as Model 1)
    Disease ~ Social_Score
    
    # Mediation pathways:
    # 1. Social connections -> Biological aging rate
    # 2. Biological aging rate -> Disease status
    aging_rate ~ Social_Score
    Disease ~ aging_rate
''')

# Fit the full mediation model
fit_full = model_full.fit(data_standardized)

# Extract model parameters
params_full = model_full.inspect()

# =================================================================
# Extract Key Path Coefficients and P-values
# =================================================================
def get_coeff_and_pvalue(params, from_var, to_var):
    """
    Extracts path coefficients and p-values from model parameters
    
    Parameters:
        params (DataFrame): Model parameters from model.inspect()
        from_var (str): Independent variable name
        to_var (str): Dependent variable name
        
    Returns:
        tuple: (coefficient_estimate, p_value) or (None, None) if path not found
    """
    # Create boolean mask to locate specific path
    mask = (params['lval'] == to_var) & (params['rval'] == from_var)
    
    if any(mask):
        return params[mask]['Estimate'].values[0], params[mask]['p-value'].values[0]
    return None, None

# Extract results from Model 1 (Direct effects only)
direct_effect_without_mediator, p_direct_without_mediator = get_coeff_and_pvalue(
    params_direct, 'Social_Score', 'Disease')

# Extract results from Model 2 (Full mediation model)
direct_effect_with_mediator, p_direct_with_mediator = get_coeff_and_pvalue(
    params_full, 'Social_Score', 'Disease')
effect_social_to_aging, p_social_to_aging = get_coeff_and_pvalue(
    params_full, 'Social_Score', 'aging_rate')
effect_aging_to_disease, p_aging_to_disease = get_coeff_and_pvalue(
    params_full, 'aging_rate', 'Disease')

# =================================================================
# Results Output
# =================================================================
print("\n[Model 1] Direct Effects Only (No Mediation)")
print(f"Social_Score → Disease path coefficient: {direct_effect_without_mediator:.5f}, p-value: {p_direct_without_mediator:.5f}")

print("\n[Model 2] Full Model (With Mediation Pathways)")
print(f"Direct path: Social_Score → Disease coefficient: {direct_effect_with_mediator:.5f}, p-value: {p_direct_with_mediator:.5e}")
print(f"Mediation path 1: Social_Score → aging_rate coefficient: {effect_social_to_aging:.5f}, p-value: {p_social_to_aging:.5e}")
print(f"Mediation path 2: aging_rate → Disease coefficient: {effect_aging_to_disease:.5f}, p-value: {p_aging_to_disease:.5e}")


[Model 1] Direct Effects Only (No Mediation)
Social_Score → Disease path coefficient: 0.00062, p-value: 0.82990

[Model 2] Full Model (With Mediation Pathways)
Direct path: Social_Score → Disease coefficient: 0.01876, p-value: 9.35748e-07
Mediation path 1: Social_Score → aging_rate coefficient: -0.02124, p-value: 0.00000e+00
Mediation path 2: aging_rate → Disease coefficient: 0.14350, p-value: 0.00000e+00
