In [None]:
import pandas as pd

time_group = pd.read_csv("/Volumes/data_files/UKB_data/processed_data/sle_group.csv")

within_five = time_group[time_group["Group"]=="0-5"]
within_five_eid = within_five["eid"].tolist()

five_ten = time_group[time_group["Group"]=="5-10"]
five_ten_eid = five_ten["eid"].tolist()

ten_fifteen = time_group[time_group["Group"]=="10-15"]
ten_fifteen_eid = ten_fifteen["eid"].tolist()


In [None]:
immune_basic = pd.read_csv("/Volumes/data_files/UKB_data/processed_data/immune_basic_fp.csv")
pro = pd.read_csv("/Volumes/data_files/UKB_data/processed_data/pro_sler")
st_features = pd.read_csv("/Volumes/data_files/UKB_data/processed_data/sle_student_t_features.csv")

In [None]:
immune_pro = pro.merge(immune_basic, on='eid', how='left')
immune_pro = immune_pro.merge(st_features[['eid', 'alcohol_amount']], on='eid', how='left')

follow_up = ['fp-SLE', 'fp-RA', 'fp-SS', 'fp-Systemic Sclerosis', 'fp-APS', 'fp-Autoimmune thyroiditis']
srd = ['srd_SLE', 'srd_RA','srd_SS','srd_Systemic Sclerosis', 'srd_APS', 'srd_Autoimmune thyroiditis']
disease = ['SLE','RA','SS', 'Systemic Sclerosis', 'APS', 'Autoimmune thyroiditis']

In [None]:
print(immune_pro.columns.tolist())

In [None]:
unuse_col = ['RA', 'SS', 'Systemic Sclerosis', 'APS', 'Autoimmune thyroiditis', 'icd10_SLE_dates', 'icd10_RA_dates', 'icd10_SS_dates', 'icd10_Systemic Sclerosis_dates', 'icd10_APS_dates', 'icd10_Autoimmune thyroiditis_dates', 'srd_RA', 'srd_SS', 'srd_Systemic Sclerosis', 'srd_APS', 'srd_Autoimmune thyroiditis', 'bmi', 'date_attend', 'birth_year', 'birth_month', 'fp-len', 'fp-RA', 'fp-SS', 'fp-Systemic Sclerosis', 'fp-APS', 'fp-Autoimmune thyroiditis', 'age']
immune_pro.drop(columns=unuse_col, inplace=True)
print(immune_pro.columns.tolist())

In [None]:
# sle_pro = immune_pro[immune_pro["srd_SLE"].isna()]
sle_pro = immune_pro[immune_pro["fp-SLE"] > 0]
sle_pro.drop(["srd_SLE"], axis=1, inplace=True)
sle_pro = sle_pro.fillna(sle_pro.median(numeric_only=True))
sle_pro_5 = sle_pro[sle_pro["eid"].isin(within_five_eid)]
sle_pro_10 = sle_pro[sle_pro["eid"].isin(five_ten_eid)]
sle_pro_15 = sle_pro[sle_pro["eid"].isin(ten_fifteen_eid)]
sle_control = sle_pro[sle_pro["SLE"]==0]
sle_pro_5_cox = pd.concat([sle_pro_5, sle_control], ignore_index=True)
sle_pro_10_cox = pd.concat([sle_pro_10, sle_control], ignore_index=True)
sle_pro_15_cox = pd.concat([sle_pro_15, sle_control], ignore_index=True)

In [None]:
pro_cols = [col for col in sle_pro.columns if col not in ["eid", "sex", "ethnicity", "alcohol_amount", "SLE","fp-SLE"]]
covar_cols = ['sex', 'ethnicity']

In [None]:
import pandas as pd
from lifelines import CoxPHFitter
from tqdm import tqdm
from statsmodels.stats.multitest import multipletests
from statsmodels.stats.multitest import fdrcorrection
from mne.stats import bonferroni_correction

from scipy.stats import norm
import numpy as np

def cox_cal(cox_data, out_file):

    results_all = []    
    n_tests = len(pro_cols)

    for t, t_pro in enumerate(tqdm(pro_cols, desc="Processing features")):
        try:

            t_col = ["SLE","fp-SLE",t_pro] + covar_cols
            t_ra_pro = cox_data[t_col].copy() 
            t_ra_pro.rename(columns={t_pro: "target_pro"}, inplace=True)

            cph = CoxPHFitter()
            cph.fit(t_ra_pro, duration_col="fp-SLE", event_col="SLE", formula=" + ".join(['target_pro'] + covar_cols))

            hr = cph.hazard_ratios_.get('target_pro', None)

            if '95% lower-bound' in cph.confidence_intervals_.columns:
                conf_int = cph.confidence_intervals_.loc['target_pro']
                lbd, ubd = conf_int['95% lower-bound'], conf_int['95% upper-bound']
            else:
                lbd, ubd = None, None

            pval = cph.summary.loc['target_pro', 'p']
            results_all.append({'Feature': t_pro, 'HR': hr, 'Lower CI': lbd, 'Upper CI': ubd, 'p-value': pval})

        except Exception as e:
            print(f"Error processing feature '{t_pro}': {e}")

    results_all_df = pd.DataFrame(results_all)
    # FDR correction
    _, p_f_fdr = fdrcorrection(results_all_df['p-value'].fillna(1))
    
    # Bonferroni correction
    alpha = 0.05
    p_f_bfi = results_all_df['p-value'].fillna(1) * len(results_all_df)  # Bonferroni correction
    p_f_bfi = p_f_bfi.clip(upper=alpha)  # Ensure p-values do not exceed alpha level
    
    results_all_df["Bonferroni"] = p_f_bfi
    results_all_df["FDR"] = p_f_fdr
    results_all_df.to_csv(out_file, index=False)


In [None]:
cox_cal(sle_pro, "/Volumes/data_files/UKB_data/immune_result/cox3/sle_pro_cox.csv")
# cox_cal(sle_pro_5_cox, "/Volumes/data_files/UKB_data/immune_result/cox3/sle_pro_5_cox.csv")
# cox_cal(sle_pro_10_cox, "/Volumes/data_files/UKB_data/immune_result/cox3/sle_pro_10_cox.csv")
# cox_cal(sle_pro_15_cox, "/Volumes/data_files/UKB_data/immune_result/cox3/sle_pro_15_cox.csv")