In [None]:
import pandas as pd
import numpy as np
from lifelines import CoxPHFitter

def main():
    # --------------------------------------------------------------------------
    # 1) LOAD THE DATA 
    # --------------------------------------------------------------------------
    # Replace 'data.csv' with your actual CSV file but do not display it publicly.
    df = pd.read_csv('data.csv')  # e.g., 'C:/path/to/your/file.csv' (hidden)

    # Ensure columns are lowercased
    df.columns = df.columns.str.lower()

    # Example checks for columns you need to adjust for
    if 'age' not in df.columns:
        raise ValueError("Column 'age' is missing. Please ensure it's in the dataset.")
    if 'sex' not in df.columns:
        raise ValueError("Column 'sex' is missing. Please ensure it's in the dataset.")
    if 'ca_delta' not in df.columns:
        raise ValueError("Column 'ca_delta' is missing. Please ensure it's in the dataset.")
    if 'vat' not in df.columns:
        raise ValueError("Column 'vat' is missing. Please ensure it's in the dataset.")

    # Convert sex if needed (0=Female, 1=Male). Adjust as appropriate for your data.
    # df['sex'] = df['sex'].replace({'Female': 0, 'Male': 1}).astype(int)

    # Mean-center ca_delta (optional, as in your script)
    ca_delta_mean = df['ca_delta'].mean()
    df['ca_delta_mean_centered'] = df['ca_delta'] - ca_delta_mean

    # --------------------------------------------------------------------------
    # 2) DEFINE OUTCOMES AS LISTED IN YOUR TABLE
    # --------------------------------------------------------------------------
    # 
    # Typically, each outcome has two columns in time-to-event analysis:
    #   1) time_to_X      -> time until event (or censor) 
    #   2) event_X        -> 1 if the event occurred, 0 otherwise
    #
    # Adjust these column names to match your dataset.
    outcomes_dict = {
        "All-cause mortality": ("time_to_mortality", "mortality_event"),
        "MACE": ("time_to_mace", "mace_event"),
        "Hypertension": ("time_to_hypertension", "hypertension"),
        "Atrial Fibrillation": ("time_to_atrial_fib", "atrial_fib"),
        "Stroke": ("time_to_stroke", "stroke"),
        "Angina": ("time_to_angina", "angina"),
        "Heart Failure": ("time_to_heart_failure", "heart_failure"),
        "Myocardial Infarction": ("time_to_mi", "mi"),
        "Type 2 Diabetes": ("time_to_diabetes", "diabetes"),
    }

    # Covariates to include in each model (adjust as needed)
    covariates = [
        'ca_delta_mean_centered',  # or 'ca_delta' if not mean-centering
        'age',
        'sex',
        'vat'
    ]

    # --------------------------------------------------------------------------
    # 3) RUN MULTIPLE COX REGRESSIONS (ONE PER OUTCOME)
    # --------------------------------------------------------------------------
    results_list = []
    cph = CoxPHFitter()

    for outcome_label, (time_col, event_col) in outcomes_dict.items():
        # Check if columns exist
        if time_col not in df.columns or event_col not in df.columns:
            print(f"Skipping {outcome_label}: '{time_col}' or '{event_col}' not in dataset.")
            continue

        # Drop rows with missing time/event or missing covariates
        model_data = df[[time_col, event_col] + covariates].dropna(subset=[time_col, event_col] + covariates)
        if model_data.empty:
            print(f"Skipping {outcome_label}: No data after dropna().")
            continue

        # Fit the Cox model
        cph.fit(
            model_data, 
            duration_col=time_col, 
            event_col=event_col,
            show_progress=False
        )

        # Extract the summary DataFrame
        summary_df = cph.summary.copy()
        summary_df['outcome'] = outcome_label  # Label each result by outcome
        summary_df['-log2(p)'] = -np.log2(summary_df['p'].replace(0, np.nextafter(0, 1)))

        # We'll store them in a list to combine later
        results_list.append(summary_df)

        # Print a short text summary for this outcome
        print(f"\n=== Cox Regression for {outcome_label} ===")
        print(cph.summary[['coef', 'exp(coef)', 'p', 'coef lower 95%', 'coef upper 95%']])

    # --------------------------------------------------------------------------
    # 4) COMBINE AND PRINT/SAVE FINAL RESULTS
    # --------------------------------------------------------------------------
    if results_list:
        all_results = pd.concat(results_list, axis=0)
        # Reorder columns for clarity
        all_results = all_results[
            ['outcome', 'coef', 'exp(coef)', 'se(coef)', 
             'coef lower 95%', 'coef upper 95%', 
             'exp(coef) lower 95%', 'exp(coef) upper 95%', 
             'z', 'p', '-log2(p)']
        ]

        # Print a consolidated text summary (one line per parameter per outcome)
        print("\n--- Consolidated Cox Results (Plain Text) ---")
        header = ", ".join(all_results.columns)
        print(header)
        for idx, row in all_results.iterrows():
            line_vals = [str(row[col]) for col in all_results.columns]
            print(", ".join(line_vals))

        # Optionally save to CSV
        all_results.to_csv("cox_regression_results.csv", index=True)
        print("\nSaved 'cox_regression_results.csv' with results for all outcomes.")
    else:
        print("\nNo valid outcomes were processed.")

if __name__ == "__main__":
    main()
