In [None]:
!pip install statsmodels
!pip install lifelines==0.26.4

In [None]:
# Imports here.
import numpy as np
import pandas as pd
import os
import statsmodels.api as sm
import statsmodels.formula.api as smf
from statsmodels.stats.multitest import fdrcorrection
from lifelines import CoxPHFitter

import warnings
warnings.filterwarnings("ignore")

In [None]:
#Select NDDs
ndd_list = ['AD', 'PD', 'DEM']

#These are the codes with enough variation to make running low, med, high worthwhile -- otherwise, any exposure is good enough
codes = ['fexofenadine', 'lormetazepam', 'nizatidine', 'lacidipine', 'rifaximin', 'losartan', 'metformin', 'colchicine', 'valproicacid', 'acetylcysteine', 'montelukast', 'sildenafil', 'amiodarone', 'clomethiazole', 'gemfibrozil', 'alogliptin', 'mirtazapine', 'aclidiniumbromide', 'clomipramine', 'nebivolol', 'minoxidil', 'clonidine', 'alfuzosin', 'rizatriptan', 'mecysteine', 'rimonabant', 'naratriptan', 'paracetamolandmetoclopramide', 'orlistat', 'griseofulvin', 'telmisartan', 'pravastatin', 'apixaban', 'linagliptin', 'amlodipine', 'diltiazem', 'nortriptyline', 'propantheline', 'indomethacin', 'ciclesonide', 'nabumetone', 'perindopril', 'digoxin', 'oxerutins', 'amisulpride', 'bisacodyl', 'carbocisteine', 'gabapentin', 'misoprostol', 'ticagrelor', 'tetrabenazine', 'methylcellulose', 'epogam', 'dantrolene', 'cyproheptadine', 'terbutaline', 'fentanyl', 'fluoxetine', 'moxisylyte', 'zonisamide', 'doxepin', 'sodiumvalproate', 'thioridazine', 'fesoterodine', 'clonazepam', 'tramadol', 'ibandronicacid', 'gliclazide', 'naltrexone', 'chlortalidone', 'orphenadrine', 'sulpiride', 'aminophylline', 'aspirin', 'glimepiride', 'budesonide', 'eprosartan', 'carvedilol', 'rupatadine', 'acemetacin', 'ibuprofen', 'aceclofenac', 'olsalazine', 'zafirlukast', 'trimethoprim', 'fosinopril', 'inositolnicotinate', 'lercanidipine', 'indapamide', 'metoprolol', 'febuxostat', 'captopril', 'disulfiram', 'propylthiouracil', 'tolbutamide', 'ezetimibe', 'risedronate', 'bisoprolol', 'pantoprazole', 'flavoxate', 'nateglinide', 'hydrochlorothiazideandramipril', 'magnesium', 'eletriptan', 'trifluoperazine', 'moxonidine', 'oxcarbazepine', 'cetirizine', 'albuterol', 'hydroxychloroquine', 'trimipramine', 'trimethoprimandsulfamethoxazole', 'nifedipine', 'sotalol', 'loratadine', 'naftidrofuryl', 'modafinil', 'esomeprazole', 'cisapride', 'oxytetracycline', 'sumatriptan', 'calcitriol', 'ranitidine', 'leflunomide', 'oxybutynin', 'perphenazine', 'ropinirole', 'ursodeoxycholicacid', 'citalopram', 'darifenacin', 'buspirone', 'alendronatesodium', 'theophylline', 'paracetamolandcodeine', 'sulfasalazine', 'pre-exposureprophylaxis', 'lamotrigine', 'dexamethasone', 'stalevo', 'mianserin', 'nitrofurantoin', 'memantine', 'lorazepam', 'anticholinergic', 'dronedarone', 'enalapril', 'fluticasone', 'lithium', 'diclofenac', 'quinapril', 'pyridostigmine', 'methylphenidate', 'trihexyphenidyl', 'co-amilofruse', 'mizolastine', 'irbesartan', 'glipizide', 'acamprosate', 'cyproterone', 'methotrexate', 'tolfenamicacid', 'prasugrel', 'isotretinoin', 'ibuprofenandcodeine', 'letrozole', 'nicardipine', 'magnesiumhydroxide', 'ipratropium', 'prednisone', 'saxagliptin', 'risperidone', 'dipipanone', 'tadalafil', 'simvastatin', 'acarbose', 'quinine', 'levothyroxine', 'triamterene', 'chlordiazepoxide', 'pramipexole', 'dihydrocodeine', 'allopurinol', 'terfenadine', 'mesalazine', 'trandolapril', 'nadolol', 'frovatriptan', 'ranolazine', 'azithromycin', 'cholestyramine', 'zopiclone', 'vigabatrin', 'tiotropium', 'bromocriptine', 'tiaprofenicacid', 'phenelzine', 'diflunisal', 'hydralazine', 'acitretin', 'ciclosporin', 'methyldopa', 'codeine', 'rifampicinandisoniazid', 'phenytoin', 'co-amilozide', 'etoricoxib', 'sulindac', 'liothyronine', 'atenolol', 'cinacalcet', 'co-beneldopa', 'lansoprazole', 'indoramin', 'fenofibrate', 'zolmitriptan', 'hydrochlorothiazideandlisinopril', 'trospium', 'sucralfate', 'eplerenone', 'cromolynsodium', 'atorvastatin', 'minocycline', 'hydroxycarbamide', 'pioglitazone', 'rasagiline', 'carbamazepine', 'isosorbidedinitrate', 'formoterol', 'pentoxifylline', 'cabergoline', 'diazepam', 'furosemide', 'amiloride', 'tranexamicacid', 'ramipril', 'lofepramine', 'etidronicacid', 'promazine', 'aliskren', 'penicillamine', 'bismuthsubsalicylate', 'cyclopenthiazide', 'finasteride', 'ironandfolicacid', 'sertraline', 'nefazodone', 'solifenacin', 'felodipine', 'fluvoxamine', 'tramacet', 'amoxicillin', 'tolterodine', 'isosorbidemononitrateandisosorbidedinitrate', 'betamethasone', 'indometacin', 'almotriptan', 'escitalopram', 'ketotifen', 'tamsulosin', 'olanzapine', 'levocetirizine', 'fluphenazine', 'tizanidine', 'cerivastatin', 'bezafibrate', 'methadone', 'bendroflumethiazide', 'rivaroxaban', 'dosulepin', 'verapamil', 'mirabegron', 'salbutamol', 'amitriptyline', 'omeprazole', 'baclofen', 'fishoil', 'venlafaxine', 'meptazinol', 'fludrocortisone', 'levetiracetam', 'rosiglitazone', 'isoniazid', 'empagliflozin', 'carbimazole', 'lisinopril', 'doxazosin', 'co-magaldrox', 'temazepam', 'imipramine', 'gaviscon', 'mycophenolicacid', 'methenamine', 'nicorandil', 'loperamide', 'mometasone', 'sterculia', 'cimetidine', 'hydroxyurea', 'celecoxib', 'imidapril', 'terazosin', 'flecainide', 'thyroxine', 'dantronanddocusate', 'dabigatran', 'prucalopride', 'mebeverine', 'macrogol', 'lactulose', 'glibenclamide', 'sibutramine', 'meloxicam', 'valdecoxib', 'manevac', 'topiramate', 'primidone', 'paroxetine', 'acrivastine', 'strontium', 'zolpidem', 'oxprenolol', 'sulfinpyrazone', 'pizotifen', 'rivastigmine', 'bumetanide', 'agomelatine', 'famotidine', 'reboxetine', 'pregabalin', 'alprostadil', 'haloperidol', 'candesartan', 'ivabradine', 'linaclotide', 'atenololandnifedipine', 'repaglinide', 'selegiline', 'olmesartan', 'oxazepam', 'paracetamol', 'dutasteride', 'prazosin', 'sodiumcromoglicate', 'amantadine', 'nefopam', 'mercaptopurine', 'insulin', 'docusate', 'buprenorphine', 'isosorbidemononitrate', 'valganciclovir', 'azathioprine', 'propranolol', 'prednisolone', 'celiprolol', 'rofecoxib', 'trazodone', 'tamoxifen', 'raloxifene', 'moclobemide', 'metolazone', 'omega-3', 'dipyridamole', 'alverine', 'alendronicacid', 'quetiapine', 'duloxetine', 'beclometasone', 'azapropazone', 'lymecycline', 'aripiprazole', 'domperidone', 'rabeprazole', 'hydrocortisone', 'phenobarbital', 'betahistine', 'salmeterol', 'loprazolam', 'anastrozole', 'fybogel', 'canagliflozin', 'clopidogrel', 'balsalazide', 'vardenafil', 'lumiracoxib', 'labetalol', 'danazol', 'clodronicacid', 'warfarin', 'methysergide', 'hydrochlorothiazide', 'sodiumpicosulfate', 'tenoxicam', 'colesevelam', 'ciprofibrate', 'chlorpromazine', 'pancrelipase', 'co-danthrusate', 'disopyramide', 'oxycodone', 'valsartan', 'nitrazepam', 'tacrolimus', 'lacosamide', 'flupentixol', 'cyclophosphamide', 'ethambutol', 'rosuvastatin', 'clobazam', 'donepezil', 'spironolactone', 'co-careldopa', 'naproxen', 'senna', 'fluvastatin', 'propiverine', 'timolol', 'dutasterideandtamsulosin', 'dapagliflozin', 'migril', 'pilocarpine', 'morphine', 'sevelamer', 'etodolac', 'dapsone', 'sitagliptin', 'nedocromil', 'propafenone', 'oxitropium']
print(len(codes))

# Run Cox with low, medium, high exposures without APOE

In [None]:
ndd_list = ['AD', 'PD', 'DEM']
timeline = 'low_med_high'
model = 'COX'

results = []

for ndd in ndd_list:
    
    #Load df
    df = pd.read_csv(f'{ndd}_low_med_high_and_ICD10_APOE_MAY_14_2025.csv', parse_dates = True)
    
    # Find codes to use so we don't have to use EVERYTHING
    codes_with_data = []
    lag = 'low'

    for code in codes:

        m = df[['TOWNSEND', 'age_at_tenure', 'GENETIC_SEX', 'tenure', ndd, 'low_' + code, 'med_' + code, 'high_' + code, 'QC0_A04', 'QC0_B02', 'QC0_B37', 'QC0_E10', 'QC0_E11', 'QC0_E27', 'QC0_E78', 'QC0_E87', 'QC0_F20', 'QC0_F31', 'QC0_F32', 'QC0_F33', 'QC0_F40', 'QC0_F41', 'QC0_F42', 'QC0_F43', 'QC0_F44', 'QC0_F45', 'QC0_F48', 'QC0_F50', 'QC0_F51', 'QC0_G40', 'QC0_G43', 'QC0_G47', 'QC0_H66', 'QC0_I10', 'QC0_I11', 'QC0_I12', 'QC0_I15', 'QC0_I20', 'QC0_I21', 'QC0_I25', 'QC0_I47', 'QC0_I48', 'QC0_I49', 'QC0_I50', 'QC0_I60', 'QC0_I61', 'QC0_I62', 'QC0_I63', 'QC0_I64', 'QC0_I65', 'QC0_I66', 'QC0_I67', 'QC0_I69', 'QC0_I82', 'QC0_K04', 'QC0_K05', 'QC0_K20', 'QC0_K21', 'QC0_K22', 'QC0_K25', 'QC0_K31', 'QC0_K51', 'QC0_K59', 'QC0_K70', 'QC0_K71', 'QC0_K72', 'QC0_K73', 'QC0_K74', 'QC0_K75', 'QC0_K76', 'QC0_L25', 'QC0_L40', 'QC0_L50', 'QC0_M06', 'QC0_M13', 'QC0_M15', 'QC0_M16', 'QC0_M17', 'QC0_M18', 'QC0_M19', 'QC0_M32', 'QC0_M45', 'QC0_M79', 'QC0_M80', 'QC0_M81', 'QC0_M88', 'QC0_N04', 'QC0_N10', 'QC0_N18', 'QC0_N19', 'QC0_N30', 'QC0_N31', 'QC0_N32', 'QC0_N39', 'QC0_N40', 'QC0_N94']]
        
        n=sum(m[f'{lag}_'+ code])
        df_pair = m[m[f'{lag}_'+ code]==1]
        n_pairs = sum(df_pair[ndd])
        if n == 0:
            pass
        elif n_pairs < 5:
            pass
        elif n == n_pairs:
            pass
        else:
            print(code)
            codes_with_data.append(code)
    
    print(ndd)
    print(len(codes_with_data))
    
    for code in codes_with_data:
        
        m = df[['TOWNSEND', 'age_at_tenure', 'GENETIC_SEX', 'tenure', ndd, 'low_' + code, 'med_' + code, 'high_' + code, 'QC0_A04', 'QC0_B02', 'QC0_B37', 'QC0_E10', 'QC0_E11', 'QC0_E27', 'QC0_E78', 'QC0_E87', 'QC0_F20', 'QC0_F31', 'QC0_F32', 'QC0_F33', 'QC0_F40', 'QC0_F41', 'QC0_F42', 'QC0_F43', 'QC0_F44', 'QC0_F45', 'QC0_F48', 'QC0_F50', 'QC0_F51', 'QC0_G40', 'QC0_G43', 'QC0_G47', 'QC0_H66', 'QC0_I10', 'QC0_I11', 'QC0_I12', 'QC0_I15', 'QC0_I20', 'QC0_I21', 'QC0_I25', 'QC0_I47', 'QC0_I48', 'QC0_I49', 'QC0_I50', 'QC0_I60', 'QC0_I61', 'QC0_I62', 'QC0_I63', 'QC0_I64', 'QC0_I65', 'QC0_I66', 'QC0_I67', 'QC0_I69', 'QC0_I82', 'QC0_K04', 'QC0_K05', 'QC0_K20', 'QC0_K21', 'QC0_K22', 'QC0_K25', 'QC0_K31', 'QC0_K51', 'QC0_K59', 'QC0_K70', 'QC0_K71', 'QC0_K72', 'QC0_K73', 'QC0_K74', 'QC0_K75', 'QC0_K76', 'QC0_L25', 'QC0_L40', 'QC0_L50', 'QC0_M06', 'QC0_M13', 'QC0_M15', 'QC0_M16', 'QC0_M17', 'QC0_M18', 'QC0_M19', 'QC0_M32', 'QC0_M45', 'QC0_M79', 'QC0_M80', 'QC0_M81', 'QC0_M88', 'QC0_N04', 'QC0_N10', 'QC0_N18', 'QC0_N19', 'QC0_N30', 'QC0_N31', 'QC0_N32', 'QC0_N39', 'QC0_N40', 'QC0_N94']]

        cph = CoxPHFitter()
        cph.fit(m, duration_col = 'tenure', event_col = ndd, show_progress=False, step_size = 0.001)
        #cph.print_summary()
        #cph.plot()
        
        actual_p = cph._compute_p_values()
        results_df = cph.summary
        results_df = results_df.reset_index()
        
        test3 = results_df.iloc[3]
        test4 = results_df.iloc[4]
        test5 = results_df.iloc[5]

        covariate = code
        
        HR3 = test3['exp(coef)']
        ci_min3 = test3['exp(coef) lower 95%']
        ci_max3 = test3['exp(coef) upper 95%']
        p3 = actual_p[3]
        
        HR4 = test4['exp(coef)']
        ci_min4 = test4['exp(coef) lower 95%']
        ci_max4 = test4['exp(coef) upper 95%']
        p4 = actual_p[4]
        
        HR5 = test5['exp(coef)']
        ci_min5 = test5['exp(coef) lower 95%']
        ci_max5 = test5['exp(coef) upper 95%']
        p5 = actual_p[5]
        
        n3=sum(m[f'low_'+ code])
        df_pair3 = m[m[f'low_'+ code]==1]
        n_pairs3 = sum(df_pair3[ndd])
        #print(n3, n_pairs3)

        n4=sum(m[f'med_'+ code])
        df_pair4 = m[m[f'med_'+ code]==1]
        n_pairs4 = sum(df_pair4[ndd])
        #print(n4, n_pairs4)

        n5=sum(m[f'high_'+ code])
        df_pair5 = m[m[f'high_'+ code]==1]
        n_pairs5 = sum(df_pair5[ndd])

        print(covariate, ndd, HR3, ci_min3, ci_max3, p3, n3, n_pairs3)
        print(covariate, ndd, HR4, ci_min4, ci_max4, p4, n4, n_pairs4)
        print(covariate, ndd, HR5, ci_min5, ci_max5, p5, n5, n_pairs5)
        results.append((covariate, ndd, model, timeline, 'low', HR3, ci_min3, ci_max3, p3, n_pairs3, n3))
        results.append((covariate, ndd, model, timeline, 'med', HR4, ci_min4, ci_max4, p4, n_pairs4, n4))
        results.append((covariate, ndd, model, timeline, 'high', HR5, ci_min5, ci_max5, p5, n_pairs5, n5))
            
cox1 = pd.DataFrame(results, columns=('PRIOR','OUTCOME', 'MODEL','TIMELINE', 'LAG', 'HR', 'ci_min', "ci_max", 'P_VAL', 'N_pairs', 'N'))

In [None]:
#Combine results
output = pd.concat([cox1])

#Adding FDR Correction

#Sort P-values
output = output.sort_values(by = "P_VAL")

#Drop Nan-values
output = output.dropna()

#FDR Correction
rejected, p_corr = fdrcorrection(output['P_VAL'], is_sorted=True)
output['P_CORR'] = p_corr
output['SIGNIFICANT'] = rejected

output

In [None]:
output.to_csv('UKB_ALL_low_med_high_with_ICD10_MAY_14_2025.csv', header = True, index = False)

In [None]:
!dx upload UKB_ALL_low_med_high_with_ICD10_MAY_14_2025.csv --path /data/results/UKB_ALL_low_med_high_with_ICD10_MAY_14_2025.csv

# Fix files to only include people for whom we have APOE data

In [None]:
# Load previously created files -- repeat for all NDDs
ndd = 'AD'
df = pd.read_csv(f'{ndd}_low_med_high_and_ICD10_APOE_MAY_14_2025.csv', parse_dates = True, low_memory = False)
df

In [None]:
# Check APOE data
df.APOE_GENOTYPE.value_counts(dropna = False)

In [None]:
# Check APOE data
df.APOE_GENOTYPE.value_counts(dropna = False)
#Remove samples with no APOE data
df = df[~df['APOE_GENOTYPE'].isna()]

#Remove samples with unknown
df = df[df['APOE_GENOTYPE'] != 'unknown']
df

In [None]:
df.to_csv(f'{ndd}_high_low_removed_no_APOE_data_MAY_14_2025.csv', header = True, index=False)

In [None]:
!dx upload {ndd}_high_low_removed_no_APOE_data_MAY_14_2025.csv --path /data/files_for_cox/{ndd}_high_low_removed_no_APOE_data_MAY_14_2025.csv

# Run Cox with low, medium, high exposures WITH APOE

In [None]:
ndd_list = ['AD', 'PD', 'DEM']
timeline = 'low_med_high'
model = 'COX'

results = []

for ndd in ndd_list:
    
    #Load df
    
    df = pd.read_csv(f'{ndd}_high_low_removed_no_APOE_data_MAY_14_2025.csv', parse_dates = True)
    
    # Find codes to use so we don't have to use EVERYTHING
    codes_with_data = []
    lag = 'low'

    for code in codes:

        m = df[['TOWNSEND', 'age_at_tenure', 'GENETIC_SEX', 'tenure', ndd, 'low_' + code, 'med_' + code, 'high_' + code, 'QC0_A04', 'QC0_B02', 'QC0_B37', 'QC0_E10', 'QC0_E11', 'QC0_E27', 'QC0_E78', 'QC0_E87', 'QC0_F20', 'QC0_F31', 'QC0_F32', 'QC0_F33', 'QC0_F40', 'QC0_F41', 'QC0_F42', 'QC0_F43', 'QC0_F44', 'QC0_F45', 'QC0_F48', 'QC0_F50', 'QC0_F51', 'QC0_G40', 'QC0_G43', 'QC0_G47', 'QC0_H66', 'QC0_I10', 'QC0_I11', 'QC0_I12', 'QC0_I15', 'QC0_I20', 'QC0_I21', 'QC0_I25', 'QC0_I47', 'QC0_I48', 'QC0_I49', 'QC0_I50', 'QC0_I60', 'QC0_I61', 'QC0_I62', 'QC0_I63', 'QC0_I64', 'QC0_I65', 'QC0_I66', 'QC0_I67', 'QC0_I69', 'QC0_I82', 'QC0_K04', 'QC0_K05', 'QC0_K20', 'QC0_K21', 'QC0_K22', 'QC0_K25', 'QC0_K31', 'QC0_K51', 'QC0_K59', 'QC0_K70', 'QC0_K71', 'QC0_K72', 'QC0_K73', 'QC0_K74', 'QC0_K75', 'QC0_K76', 'QC0_L25', 'QC0_L40', 'QC0_L50', 'QC0_M06', 'QC0_M13', 'QC0_M15', 'QC0_M16', 'QC0_M17', 'QC0_M18', 'QC0_M19', 'QC0_M32', 'QC0_M45', 'QC0_M79', 'QC0_M80', 'QC0_M81', 'QC0_M88', 'QC0_N04', 'QC0_N10', 'QC0_N18', 'QC0_N19', 'QC0_N30', 'QC0_N31', 'QC0_N32', 'QC0_N39', 'QC0_N40', 'QC0_N94', 'e3/e4', 'e4/e4']]
        
        n=sum(m[f'{lag}_'+ code])
        df_pair = m[m[f'{lag}_'+ code]==1]
        n_pairs = sum(df_pair[ndd])
        if n == 0:
            pass
        elif n_pairs < 5:
            pass
        elif n == n_pairs:
            pass
        else:
            print(code)
            codes_with_data.append(code)
    
    print(ndd)
    print(len(codes_with_data))
    
    for code in codes_with_data:
    #for code in codes:
        
        m = df[['TOWNSEND', 'age_at_tenure', 'GENETIC_SEX', 'tenure', ndd, 'low_' + code, 'med_' + code, 'high_' + code, 'QC0_A04', 'QC0_B02', 'QC0_B37', 'QC0_E10', 'QC0_E11', 'QC0_E27', 'QC0_E78', 'QC0_E87', 'QC0_F20', 'QC0_F31', 'QC0_F32', 'QC0_F33', 'QC0_F40', 'QC0_F41', 'QC0_F42', 'QC0_F43', 'QC0_F44', 'QC0_F45', 'QC0_F48', 'QC0_F50', 'QC0_F51', 'QC0_G40', 'QC0_G43', 'QC0_G47', 'QC0_H66', 'QC0_I10', 'QC0_I11', 'QC0_I12', 'QC0_I15', 'QC0_I20', 'QC0_I21', 'QC0_I25', 'QC0_I47', 'QC0_I48', 'QC0_I49', 'QC0_I50', 'QC0_I60', 'QC0_I61', 'QC0_I62', 'QC0_I63', 'QC0_I64', 'QC0_I65', 'QC0_I66', 'QC0_I67', 'QC0_I69', 'QC0_I82', 'QC0_K04', 'QC0_K05', 'QC0_K20', 'QC0_K21', 'QC0_K22', 'QC0_K25', 'QC0_K31', 'QC0_K51', 'QC0_K59', 'QC0_K70', 'QC0_K71', 'QC0_K72', 'QC0_K73', 'QC0_K74', 'QC0_K75', 'QC0_K76', 'QC0_L25', 'QC0_L40', 'QC0_L50', 'QC0_M06', 'QC0_M13', 'QC0_M15', 'QC0_M16', 'QC0_M17', 'QC0_M18', 'QC0_M19', 'QC0_M32', 'QC0_M45', 'QC0_M79', 'QC0_M80', 'QC0_M81', 'QC0_M88', 'QC0_N04', 'QC0_N10', 'QC0_N18', 'QC0_N19', 'QC0_N30', 'QC0_N31', 'QC0_N32', 'QC0_N39', 'QC0_N40', 'QC0_N94', 'e3/e4', 'e4/e4']]

        cph = CoxPHFitter()
        cph.fit(m, duration_col = 'tenure', event_col = ndd, show_progress=False, step_size = 0.001)
        #cph.print_summary()
        #cph.plot()
        
        actual_p = cph._compute_p_values()
        results_df = cph.summary
        results_df = results_df.reset_index()
        
        test3 = results_df.iloc[3]
        test4 = results_df.iloc[4]
        test5 = results_df.iloc[5]

        covariate = code
        
        HR3 = test3['exp(coef)']
        ci_min3 = test3['exp(coef) lower 95%']
        ci_max3 = test3['exp(coef) upper 95%']
        p3 = actual_p[3]
        
        HR4 = test4['exp(coef)']
        ci_min4 = test4['exp(coef) lower 95%']
        ci_max4 = test4['exp(coef) upper 95%']
        p4 = actual_p[4]
        
        HR5 = test5['exp(coef)']
        ci_min5 = test5['exp(coef) lower 95%']
        ci_max5 = test5['exp(coef) upper 95%']
        p5 = actual_p[5]
        
        n3=sum(m[f'low_'+ code])
        df_pair3 = m[m[f'low_'+ code]==1]
        n_pairs3 = sum(df_pair3[ndd])
        #print(n3, n_pairs3)

        n4=sum(m[f'med_'+ code])
        df_pair4 = m[m[f'med_'+ code]==1]
        n_pairs4 = sum(df_pair4[ndd])
        #print(n4, n_pairs4)

        n5=sum(m[f'high_'+ code])
        df_pair5 = m[m[f'high_'+ code]==1]
        n_pairs5 = sum(df_pair5[ndd])

        print(covariate, ndd, HR3, ci_min3, ci_max3, p3, n3, n_pairs3)
        print(covariate, ndd, HR4, ci_min4, ci_max4, p4, n4, n_pairs4)
        print(covariate, ndd, HR5, ci_min5, ci_max5, p5, n5, n_pairs5)
        results.append((covariate, ndd, model, timeline, 'low', HR3, ci_min3, ci_max3, p3, n_pairs3, n3))
        results.append((covariate, ndd, model, timeline, 'med', HR4, ci_min4, ci_max4, p4, n_pairs4, n4))
        results.append((covariate, ndd, model, timeline, 'high', HR5, ci_min5, ci_max5, p5, n_pairs5, n5))
            
cox1 = pd.DataFrame(results, columns=('PRIOR','OUTCOME', 'MODEL','TIMELINE', 'LAG', 'HR', 'ci_min', "ci_max", 'P_VAL', 'N_pairs', 'N'))

In [None]:
#Combine results
output = pd.concat([cox1])

#Adding FDR Correction

#Sort P-values
output = output.sort_values(by = "P_VAL")

#Drop Nan-values
output = output.dropna()

#FDR Correction
rejected, p_corr = fdrcorrection(output['P_VAL'], is_sorted=True)
output['P_CORR'] = p_corr
output['SIGNIFICANT'] = rejected

output

In [None]:
output.to_csv('UKB_ALL_low_med_high_with_ICD10_and_APOE_MAY_14_2025.csv', header = True, index = False)

In [None]:
!dx upload UKB_ALL_low_med_high_with_ICD10_and_APOE_MAY_14_2025.csv --path /data/results/UKB_ALL_low_med_high_with_ICD10_and_APOE_MAY_14_2025.csv