In [None]:
import pandas as pd
from sklearn.preprocessing import StandardScaler, LabelEncoder
import matplotlib.pyplot as plt
from scipy.stats import pearsonr
import scipy.stats as stats
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.formula.api import ols
from sklearn.impute import KNNImputer
import seaborn as sns

In [None]:
delta_female = pd.read_csv('path\\delta_heart_female.csv')
delta_male = pd.read_csv('path\\delta_heart_male.csv')

In [None]:
delta_female

In [None]:
obesity_qc = pd.read_csv('path\\obesity_measures.csv')

In [None]:
covariates = pd.read_csv('path\\ukb_covariates_34K.csv')

In [None]:
ids_after_cvd = pd.read_csv('path/CMR_phenotypes_after_cmr_taken_36K.csv')
date_assit = pd.read_csv('path\\f.53.tab', sep="\t")

In [None]:
bmi = pd.read_csv('path\\obesity_bmi_height_weight_cmr_phenotypes_45K.csv')
waist = pd.read_csv('path\\f.48.tab', sep="\t")
hip = pd.read_csv('path\\f.49.tab', sep="\t")

In [None]:
bmi_ = bmi[['f.eid','BMI']]
waist_ = waist[['f.eid', 'f.48.2.0']]
hip_ = hip[['f.eid', 'f.49.2.0']]

bmi_waist = bmi_.merge(waist_)
bmi_waist_hip = bmi_waist.merge(hip_)

bmi_waist_hip['waist_hip_ratio'] = bmi_waist_hip['f.48.2.0']/bmi_waist_hip['f.49.2.0']

In [None]:
scaler = StandardScaler()
encoder = LabelEncoder()

In [None]:
smoking_status_map = {
    'Never': 0,
    'Previous': 1,
    'Current': 2,
    'Prefer not to answer': 3
}

# Multiregression lineal - Female

## Create dataframes

In [None]:
merge_fat_delta = delta_female.merge(obesity_qc, on='f.eid', how='left')

In [None]:
merge_fat_delta 

In [None]:
female = covariates[covariates['sex'] == 0].reset_index(drop=True)

In [None]:
female['smoking_status_numeric'] = female['smoking_status'].map(smoking_status_map)
female['age'] = scaler.fit_transform(female[['age_at_recruitment_visit2']])
female['ethnicity'] = encoder.fit_transform(female['ethnicity'])

In [None]:
female

In [None]:
ml_delta_test_cova = female[['f.eid', 'obesity_groups', 'drinking_status','physical_moderate', 'ethnicity', 'smoking_status_numeric']]

In [None]:
ml_delta_test = merge_fat_delta.merge(ml_delta_test_cova, on='f.eid') #, how='left')

In [None]:
ml_delta_test

In [None]:
ml_delta_test = ml_delta_test.merge(bmi_waist_hip)

In [None]:
ml_delta_test['VAT'] = ml_delta_test['f.22407.2.0']
ml_delta_test['ASAT'] = ml_delta_test['f.22408.2.0']
ml_delta_test['Pericardial'] = ml_delta_test['meanArea (cm2)']
ml_delta_test['Waist'] = ml_delta_test['f.48.2.0']
ml_delta_test['Hip'] = ml_delta_test['f.49.2.0']
ml_delta_test['WHR'] = ml_delta_test['waist_hip_ratio']

In [None]:
ml_delta_test['obesity_groups'] = ml_delta_test['obesity_groups_x']

In [None]:
ml_delta_test.columns

In [None]:
ml_delta_test = ml_delta_test.drop(columns=['f.22407.2.0', 'f.22408.2.0', 'age_at_recruitment_visit2_x', 'f.48.2.0', 'f.49.2.0', 'obesity_groups_x', 'obesity_groups_y', 'predicted DSC', 'waist_hip_ratio'])

In [None]:
ml_delta_test

In [None]:
ml_delta_test.isna().sum()

In [None]:
ml_delta_test.columns[:-1]

In [None]:
imputer = KNNImputer(n_neighbors=10)
ml_delta_test_imputed = imputer.fit_transform(ml_delta_test.drop(columns='obesity_groups'))

In [None]:
ml_delta_test_imputed = pd.DataFrame(ml_delta_test_imputed, columns=ml_delta_test.columns[:-1])

In [None]:
delta_test_association = ml_delta_test_imputed.merge(ml_delta_test[['obesity_groups']], left_index=True, right_index=True)

## Results of association

In [None]:
variables_interes = ["VAT", "ASAT", "Pericardial", "BMI", "Waist", "Hip", "WHR"]
resultados_lista = []

# Iterar sobre cada grupo de obesidad
for group in delta_test_association["obesity_groups"].unique():
    df_group = delta_test_association[delta_test_association["obesity_groups"] == group].copy()

    scaler = StandardScaler()
    vars_to_standardize = ["delta"] + variables_interes
    df_group[vars_to_standardize] = scaler.fit_transform(df_group[vars_to_standardize])

    # Iterar sobre cada variable individualmente
    for var in variables_interes:
        modelo = smf.ols(f"delta ~ {var} + drinking_status + physical_moderate + ethnicity + smoking_status_numeric", data=df_group).fit()

        resumen = pd.DataFrame({
            "Grupo": [group],
            "Variable": [var],
            "Coeficiente": [modelo.params[var]],
            "Error Estándar": [modelo.bse[var]],
            "p value": [modelo.pvalues[var]],
            "IC 2.5%": [modelo.conf_int().loc[var, 0]],
            "IC 97.5%": [modelo.conf_int().loc[var, 1]]
        })

        # Formatear coeficiente con intervalo de confianza
        resumen["β (95% CI)"] = resumen.apply(
            lambda row: f"{row['Coeficiente']:.3f} ({row['IC 2.5%']:.3f}, {row['IC 97.5%']:.3f})", axis=1
        )

        resultados_lista.append(resumen)

resultados_df_all_groups = pd.concat(resultados_lista, ignore_index=True)

df_pivot2 = resultados_df_all_groups.pivot_table(
    index="Grupo", 
    columns="Variable", 
    values=["β (95% CI)", "p value"], 
    aggfunc="first"
)

df_pivot2.columns = pd.MultiIndex.from_tuples(
    [(col[1], "β (95% CI)") if col[0] == "β (95% CI)" else (col[1], "p value") for col in df_pivot2.columns])

column_order = [
    ('VAT', 'β (95% CI)'), ('VAT', 'p value'),
    ('ASAT', 'β (95% CI)'), ('ASAT', 'p value'),
    ('Pericardial', 'β (95% CI)'), ('Pericardial', 'p value'),
    ('BMI', 'β (95% CI)'), ('BMI', 'p value'),
    ('Hip', 'β (95% CI)'), ('Hip', 'p value'),
    ('Waist', 'β (95% CI)'), ('Waist', 'p value'),
    ('WHR', 'β (95% CI)'), ('WHR', 'p value')
]

df_pivot2 = df_pivot2[column_order]

In [None]:
df_pivot2.columns

In [None]:
df_pivot2

In [None]:
resultados_df_all_groups["Significativo"] = resultados_df_all_groups["p value"] < 0.05

# Sort by group and variable for consistent layout
df_plot = resultados_df_all_groups.sort_values(by=["Grupo", "Variable"]).copy()

# Create a unique label per row for plotting (e.g., "Group1 - VAT")
df_plot["Etiqueta"] = df_plot["Grupo"] + " - " + df_plot["Variable"]

# Reverse order for top-down plotting
df_plot = df_plot[::-1].reset_index(drop=True)

# Set figure size
plt.figure(figsize=(10, len(df_plot) * 0.4))
sns.set(style="whitegrid")

# Plot points
palette = {True: "tab:blue", False: "red"}
sns.scatterplot(
    data=df_plot,
    x="Coeficiente",
    y="Etiqueta",
    hue="Significativo",
    palette=palette,
    s=70,
    legend=False
)

# Plot confidence intervals
for idx, row in df_plot.iterrows():
    plt.plot([row["IC 2.5%"], row["IC 97.5%"]], [idx, idx], color=palette[row["Significativo"]], lw=2)

# Add vertical line at 0 (null effect)
plt.axvline(x=0, color="black", linestyle="--", lw=1)

# Labels and title
plt.xlabel("Standardized β (95% CI)", fontsize=14)
plt.ylabel("")
plt.yticks(fontsize=12)
plt.title("Forest Plot of Standardized Associations by Obesity Group", fontsize=16)

# Improve layout
plt.tight_layout()
plt.savefig('paths\\forest_plot_obesity_groups_female.png', bbox_inches='tight', dpi=300) 

plt.show()

In [None]:
variables_interes = ["VAT", "ASAT", "Pericardial", "BMI", "Waist", "Hip", "WHR"]

# Copy and standardize the data
df_std = delta_test_association.copy()

# Standardize delta and variables of interest
scaler = StandardScaler()
vars_to_standardize = ["delta"] + variables_interes
df_std[vars_to_standardize] = scaler.fit_transform(df_std[vars_to_standardize])

# Store results
resultados_lista = []

# Loop over variables
for var in variables_interes:
    modelo = smf.ols(
        f"delta ~ {var} + drinking_status + physical_moderate + ethnicity + smoking_status_numeric", 
        data=df_std
    ).fit()

    # Get stats
    resumen = pd.DataFrame({
        "Variable": [var],
        "Coeficiente": [modelo.params[var]],
        "Error Estándar": [modelo.bse[var]],
        "p value": [modelo.pvalues[var]],
        "IC 2.5%": [modelo.conf_int().loc[var, 0]],
        "IC 97.5%": [modelo.conf_int().loc[var, 1]]
    })

    resumen["β (95% CI)"] = resumen.apply(
        lambda row: f"{row['Coeficiente']:.3f} ({row['IC 2.5%']:.3f}, {row['IC 97.5%']:.3f})", axis=1
    )

    resultados_lista.append(resumen)

# Combine into one DataFrame
resultados_df_total = pd.concat(resultados_lista, ignore_index=True)

# Optional: sort by p-value or beta
resultados_df_total = resultados_df_total.sort_values("p value")

# View
print(resultados_df_total)

In [None]:
resultados_df_total["Significativo"] = resultados_df_total["p value"] < 0.05

# Sort by group and variable for consistent layout
df_plot = resultados_df_total.sort_values(by=["Variable"]).copy()

# Create a unique label per row for plotting (e.g., "Group1 - VAT")
df_plot["Etiqueta"] = df_plot["Variable"]

# Reverse order for top-down plotting
df_plot = df_plot[::-1].reset_index(drop=True)

# Set figure size
plt.figure(figsize=(10, len(df_plot) * 0.4))
sns.set(style="whitegrid")

# Plot points
palette = {True: "tab:blue", False: "gray"}
sns.scatterplot(
    data=df_plot,
    x="Coeficiente",
    y="Etiqueta",
    hue="Significativo",
    palette=palette,
    s=70,
    legend=False
)

# Plot confidence intervals
for idx, row in df_plot.iterrows():
    plt.plot([row["IC 2.5%"], row["IC 97.5%"]], [idx, idx], color=palette[row["Significativo"]], lw=2)

# Add vertical line at 0 (null effect)
plt.axvline(x=0, color="black", linestyle="--", lw=1)

# Labels and title
plt.xlabel("Standardized β (95% CI)", fontsize=14)
plt.ylabel("", fontsize=14)
plt.yticks(fontsize=12)
plt.title("Forest Plot of Standardized Associations by Obesity Group", fontsize=16)

# Improve layout
plt.tight_layout()
plt.savefig('path\\forest_plot_female.png', bbox_inches='tight', dpi=300) 

plt.show()

# Multiregression lineal - Male

## Create dataframes

In [None]:
merge_fat_delta = delta_male.merge(obesity_qc, on='f.eid')

In [None]:
male = covariates[covariates['sex'] == 1].reset_index(drop=True)

In [None]:
male['smoking_status_numeric'] = male['smoking_status'].map(smoking_status_map)
male['age'] = scaler.fit_transform(male[['age_at_recruitment_visit2']])
male['ethnicity'] = encoder.fit_transform(male['ethnicity'])

In [None]:
ml_delta_test_cova = male[['f.eid', 'obesity_groups', 'drinking_status','physical_moderate', 'ethnicity', 'smoking_status_numeric']]

In [None]:
ml_delta_test = merge_fat_delta.merge(ml_delta_test_cova, on='f.eid')

In [None]:
ml_delta_test = ml_delta_test.merge(bmi_waist_hip)

In [None]:
ml_delta_test['VAT'] = ml_delta_test['f.22407.2.0']
ml_delta_test['ASAT'] = ml_delta_test['f.22408.2.0']
ml_delta_test['Pericardial'] = ml_delta_test['meanArea (cm2)']
ml_delta_test['Waist'] = ml_delta_test['f.48.2.0']
ml_delta_test['Hip'] = ml_delta_test['f.49.2.0']
ml_delta_test['WHR'] = ml_delta_test['waist_hip_ratio']

In [None]:
ml_delta_test['obesity_groups'] = ml_delta_test['obesity_groups_x']

In [None]:
ml_delta_test = ml_delta_test.drop(columns=['f.22407.2.0', 'f.22408.2.0', 'age_at_recruitment_visit2_x', 'f.48.2.0', 'f.49.2.0', 'obesity_groups_x', 'obesity_groups_y', 'predicted DSC', 'waist_hip_ratio'])

In [None]:
imputer = KNNImputer(n_neighbors=10)
ml_delta_test_imputed = imputer.fit_transform(ml_delta_test.drop(columns='obesity_groups'))

In [None]:
ml_delta_test_imputed = pd.DataFrame(ml_delta_test_imputed, columns=ml_delta_test.columns[:-1])

In [None]:
delta_test_association = ml_delta_test_imputed.merge(ml_delta_test[['obesity_groups']], left_index=True, right_index=True)

## Results of association

In [None]:
variables_interes = ["VAT", "ASAT", "Pericardial", "BMI", "Waist", "Hip", "WHR"]
resultados_lista = []

# Iterar sobre cada grupo de obesidad
for group in delta_test_association["obesity_groups"].unique():
    df_group = delta_test_association[delta_test_association["obesity_groups"] == group].copy()

    scaler = StandardScaler()
    vars_to_standardize = ["delta"] + variables_interes
    df_group[vars_to_standardize] = scaler.fit_transform(df_group[vars_to_standardize])

    # Iterar sobre cada variable individualmente
    for var in variables_interes:
        modelo = smf.ols(f"delta ~ {var} + drinking_status + physical_moderate + ethnicity + smoking_status_numeric", data=df_group).fit()

        resumen = pd.DataFrame({
            "Grupo": [group],
            "Variable": [var],
            "Coeficiente": [modelo.params[var]],
            "Error Estándar": [modelo.bse[var]],
            "p value": [modelo.pvalues[var]],
            "IC 2.5%": [modelo.conf_int().loc[var, 0]],
            "IC 97.5%": [modelo.conf_int().loc[var, 1]]
        })

        # Formatear coeficiente con intervalo de confianza
        resumen["β (95% CI)"] = resumen.apply(
            lambda row: f"{row['Coeficiente']:.3f} ({row['IC 2.5%']:.3f}, {row['IC 97.5%']:.3f})", axis=1
        )

        resultados_lista.append(resumen)

resultados_df_all_groups = pd.concat(resultados_lista, ignore_index=True)

df_pivot2 = resultados_df_all_groups.pivot_table(
    index="Grupo", 
    columns="Variable", 
    values=["β (95% CI)", "p value"], 
    aggfunc="first"
)

df_pivot2.columns = pd.MultiIndex.from_tuples(
    [(col[1], "β (95% CI)") if col[0] == "β (95% CI)" else (col[1], "p value") for col in df_pivot2.columns])

column_order = [
    ('VAT', 'β (95% CI)'), ('VAT', 'p value'),
    ('ASAT', 'β (95% CI)'), ('ASAT', 'p value'),
    ('Pericardial', 'β (95% CI)'), ('Pericardial', 'p value'),
    ('BMI', 'β (95% CI)'), ('BMI', 'p value'),
    ('Hip', 'β (95% CI)'), ('Hip', 'p value'),
    ('Waist', 'β (95% CI)'), ('Waist', 'p value'),
    ('WHR', 'β (95% CI)'), ('WHR', 'p value')
]

df_pivot2 = df_pivot2[column_order]

In [None]:
resultados_df_all_groups["Significativo"] = resultados_df_all_groups["p value"] < 0.05

# Sort by group and variable for consistent layout
df_plot = resultados_df_all_groups.sort_values(by=["Grupo", "Variable"]).copy()

# Create a unique label per row for plotting (e.g., "Group1 - VAT")
df_plot["Etiqueta"] = df_plot["Grupo"] + " - " + df_plot["Variable"]

# Reverse order for top-down plotting
df_plot = df_plot[::-1].reset_index(drop=True)

# Set figure size
plt.figure(figsize=(10, len(df_plot) * 0.4))
sns.set(style="whitegrid")

# Plot points
palette = {True: "tab:blue", False: "red"}
sns.scatterplot(
    data=df_plot,
    x="Coeficiente",
    y="Etiqueta",
    hue="Significativo",
    palette=palette,
    s=70,
    legend=False
)

# Plot confidence intervals
for idx, row in df_plot.iterrows():
    plt.plot([row["IC 2.5%"], row["IC 97.5%"]], [idx, idx], color=palette[row["Significativo"]], lw=2)

# Add vertical line at 0 (null effect)
plt.axvline(x=0, color="black", linestyle="--", lw=1)

# Labels and title
plt.xlabel("Standardized β (95% CI)", fontsize=14)
plt.ylabel("")
plt.yticks(fontsize=12)
plt.title("Forest Plot of Standardized Associations by Obesity Group", fontsize=16)

# Improve layout
plt.tight_layout()
plt.savefig('\\Users\\Cynthia Maldonado\\OneDrive - Queen Mary, University of London\\Biological Aging\\concept paper\\figures\\forest_plot_obesity_groups_male.png', bbox_inches='tight', dpi=300) 

plt.show()

In [None]:
variables_interes = ["VAT", "ASAT", "Pericardial", "BMI", "Waist", "Hip", "WHR"]

# Copy and standardize the data
df_std = delta_test_association.copy()

# Standardize delta and variables of interest
scaler = StandardScaler()
vars_to_standardize = ["delta"] + variables_interes
df_std[vars_to_standardize] = scaler.fit_transform(df_std[vars_to_standardize])

# Store results
resultados_lista = []

# Loop over variables
for var in variables_interes:
    modelo = smf.ols(
        f"delta ~ {var} + drinking_status + physical_moderate + ethnicity + smoking_status_numeric", 
        data=df_std
    ).fit()

    # Get stats
    resumen = pd.DataFrame({
        "Variable": [var],
        "Coeficiente": [modelo.params[var]],
        "Error Estándar": [modelo.bse[var]],
        "p value": [modelo.pvalues[var]],
        "IC 2.5%": [modelo.conf_int().loc[var, 0]],
        "IC 97.5%": [modelo.conf_int().loc[var, 1]]
    })

    resumen["β (95% CI)"] = resumen.apply(
        lambda row: f"{row['Coeficiente']:.3f} ({row['IC 2.5%']:.3f}, {row['IC 97.5%']:.3f})", axis=1
    )

    resultados_lista.append(resumen)

# Combine into one DataFrame
resultados_df_total = pd.concat(resultados_lista, ignore_index=True)

# Optional: sort by p-value or beta
resultados_df_total = resultados_df_total.sort_values("p value")

# View
print(resultados_df_total)

In [None]:
resultados_df_total["Significativo"] = resultados_df_total["p value"] < 0.05

# Sort by group and variable for consistent layout
df_plot = resultados_df_total.sort_values(by=["Variable"]).copy()

# Create a unique label per row for plotting (e.g., "Group1 - VAT")
df_plot["Etiqueta"] = df_plot["Variable"]

# Reverse order for top-down plotting
df_plot = df_plot[::-1].reset_index(drop=True)

# Set figure size
plt.figure(figsize=(10, len(df_plot) * 0.4))
sns.set(style="whitegrid")

# Plot points
palette = {True: "tab:blue", False: "gray"}
sns.scatterplot(
    data=df_plot,
    x="Coeficiente",
    y="Etiqueta",
    hue="Significativo",
    palette=palette,
    s=70,
    legend=False
)

# Plot confidence intervals
for idx, row in df_plot.iterrows():
    plt.plot([row["IC 2.5%"], row["IC 97.5%"]], [idx, idx], color=palette[row["Significativo"]], lw=2)

# Add vertical line at 0 (null effect)
plt.axvline(x=0, color="black", linestyle="--", lw=1)

# Labels and title
plt.xlabel("Standardized β (95% CI)", fontsize=14)
plt.ylabel("", fontsize=14)
plt.yticks(fontsize=12)
plt.title("Forest Plot of Standardized Associations by Obesity Group", fontsize=16)

# Improve layout
plt.tight_layout()
plt.savefig('\\Users\\Cynthia Maldonado\\OneDrive - Queen Mary, University of London\\Biological Aging\\concept paper\\figures\\forest_plot_male.png', bbox_inches='tight', dpi=300) 

plt.show()