# Benchmarks

## Initialize

In [1]:
import os
import math
import pathlib
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from IPython.display import clear_output

import warnings
from lifelines.utils import CensoringType
from lifelines.utils import concordance_index

In [2]:
base_path = "/home/jakobs"

project_path = f"{base_path}/data"

experiment = '230629'
experiment_path = f"{project_path}/{experiment}"
pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True)

partitions = [i for i in range(10)]

today = experiment

In [3]:
splits = ["train",'test'] # "test_left", 'test_right']

In [4]:
endpoint_defs = pd.read_feather(f"{base_path}/data/endpoints_epic_md.feather").set_index("endpoint")
endpoints = endpoint_defs.index.to_list()

In [5]:
eligable_eids = pd.read_feather(f"{experiment_path}/eligible_eids_{today}.feather")
eids_dict = eligable_eids.set_index("endpoint")["eid_list"].to_dict()

In [6]:
%env MKL_NUM_THREADS=4
%env NUMEXPR_NUM_THREADS=4
%env OMP_NUM_THREADS=4

env: MKL_NUM_THREADS=4
env: NUMEXPR_NUM_THREADS=4
env: OMP_NUM_THREADS=4


In [7]:
#ray.shutdown()

In [8]:
import ray

ray.init(address="auto")#, dashboard_port=24762, dashboard_host="0.0.0.0", include_dashboard=True)#, webui_url="0.0.0.0"))

2023-06-30 10:24:54,024	INFO worker.py:1364 -- Connecting to existing Ray cluster at address: 144.82.131.99:52557...
2023-06-30 10:24:54,030	INFO worker.py:1553 -- Connected to Ray cluster.


0,1
Python version:,3.11.0
Ray version:,2.3.0


# Train COX

In [9]:
in_path = pathlib.Path(f"{experiment_path}/coxph/input")
model_path = f"{experiment_path}/coxph/models"

out_path = f"{experiment_path}/coxph/predictions"
pathlib.Path(out_path).mkdir(parents=True, exist_ok=True)

In [10]:
import pickle
import zstandard

def load_pickle(fp):
    with open(fp, "rb") as fh:
        dctx = zstandard.ZstdDecompressor()
        with dctx.stream_reader(fh) as decompressor:
            data = pickle.loads(decompressor.read())
    return data

In [11]:
cox_paths = !ls $model_path
cox_paths = [p for p in cox_paths if "_Retina" in p or "+Retina" in p or "I(" in p]
cox = pd.Series(cox_paths).str.split("_", expand=True)\
    .assign(path = cox_paths)\
    .assign(endpoint = lambda x: x[0]+"_"+x[1])\
    .assign(score = lambda x: x[2])\
    .assign(model = lambda x: x[3])\
    .assign(partition = lambda x: x[4].str.replace(".p", "", regex=True).astype(int))\
    [["endpoint", "score", "model", "partition", "path"]].sort_values(["endpoint", "score", "partition"])\
    .query("endpoint ==@ endpoints")\
    .query("score.str.contains('Retina')")\
    .reset_index(drop=True)
cox

Unnamed: 0,endpoint,score,model,partition,path
0,OMOP_4306655,ASCVD+Retina,RetinaUKB,0,OMOP_4306655_ASCVD+Retina_RetinaUKB_0.p
1,OMOP_4306655,ASCVD+Retina,RetinaUKB,1,OMOP_4306655_ASCVD+Retina_RetinaUKB_1.p
2,OMOP_4306655,ASCVD+Retina,RetinaUKB,2,OMOP_4306655_ASCVD+Retina_RetinaUKB_2.p
3,OMOP_4306655,ASCVD+Retina,RetinaUKB,3,OMOP_4306655_ASCVD+Retina_RetinaUKB_3.p
4,OMOP_4306655,ASCVD+Retina,RetinaUKB,4,OMOP_4306655_ASCVD+Retina_RetinaUKB_4.p
...,...,...,...,...,...
14715,phecode_979,SCORE2+Retina,RetinaUKB,5,phecode_979_SCORE2+Retina_RetinaUKB_5.p
14716,phecode_979,SCORE2+Retina,RetinaUKB,6,phecode_979_SCORE2+Retina_RetinaUKB_6.p
14717,phecode_979,SCORE2+Retina,RetinaUKB,7,phecode_979_SCORE2+Retina_RetinaUKB_7.p
14718,phecode_979,SCORE2+Retina,RetinaUKB,8,phecode_979_SCORE2+Retina_RetinaUKB_8.p


In [12]:
#endpoints = sorted(cox.endpoint.unique().tolist())
scores = sorted(cox.score.unique().tolist())
partitions = sorted(cox.partition.unique().tolist())

In [13]:
#import ray

#@ray.remote
def get_cox_info(p):
    cph = load_pickle(f"{model_path}/{p}")
    p_split = p.split("_")
    endpoint = f"{p_split[0]}_{p_split[1]}"
    score = p_split[2]
    model = p_split[3]
    partition = p_split[4].replace(".p", "")
    hrs = cph.hazard_ratios_.to_dict()
    
    hr_ret = hrs[endpoint] 
    hr_ret_age = np.nan
    hr_ret_sex = np.nan
        
    return {"endpoint": endpoint, 
            "score": score, 
            "model": model,
            "partition": partition, 
            "hrs": hrs, 
            "hrs_ret": hr_ret, 
            "hrs_ret_age": hr_ret_age, 
            "hrs_ret_sex": hr_ret_sex
           }

In [14]:
p = cox.path.to_list()[0]
cph = load_pickle(f"{model_path}/{p}")
p_split = p.split("_")
endpoint = f"{p_split[0]}_{p_split[1]}"
score = p_split[2]
model = p_split[3]
partition = p_split[4].replace(".p", "")
hrs = cph.hazard_ratios_.to_dict()

In [15]:
cph.print_summary()

0,1
model,lifelines.CoxPHFitter
duration col,'OMOP_4306655_time'
event col,'OMOP_4306655_event'
penalizer,0.0125
l1 ratio,0.0
baseline estimation,breslow
number of observations,6504
number of events observed,102
partial log-likelihood,-772.51
time fit was run,2023-06-30 09:12:16 UTC

Unnamed: 0,coef,exp(coef),se(coef),coef lower 95%,coef upper 95%,exp(coef) lower 95%,exp(coef) upper 95%,cmp to,z,p,-log2(p)
age,0.35,1.43,0.08,0.2,0.51,1.22,1.67,0.0,4.48,<0.005,17.03
sex_Male,0.18,1.2,0.16,-0.12,0.49,0.88,1.63,0.0,1.17,0.24,2.05
smoking_status_Current,0.23,1.26,0.36,-0.47,0.94,0.63,2.55,0.0,0.65,0.51,0.96
smoking_status_Former,0.1,1.1,0.18,-0.25,0.44,0.78,1.56,0.0,0.56,0.58,0.8
smoking_status_Never,-0.14,0.87,0.18,-0.48,0.21,0.62,1.23,0.0,-0.78,0.43,1.2
diabetes,0.11,1.12,0.06,0.0,0.22,1.0,1.25,0.0,1.99,0.05,4.43
systolic_blood_pressure,0.07,1.08,0.07,-0.07,0.22,0.93,1.25,0.0,0.98,0.33,1.62
cholesterol,-0.08,0.92,0.08,-0.23,0.07,0.79,1.07,0.0,-1.03,0.30,1.72
hdl_cholesterol,0.07,1.08,0.08,-0.08,0.23,0.93,1.25,0.0,0.97,0.33,1.6
OMOP_4306655,0.46,1.59,0.15,0.18,0.75,1.19,2.11,0.0,3.18,<0.005,9.43

0,1
Concordance,0.78
Partial AIC,1565.01
log-likelihood ratio test,69.82 on 10 df
-log2(p) of ll-ratio test,34.27


In [16]:
rows = []

for p in tqdm(cox.path.tolist()):
    rows.append(get_cox_info(p))

  0%|          | 0/14720 [00:00<?, ?it/s]

In [17]:
hrs_endpoints = pd.DataFrame(rows)

In [18]:
hrs_endpoints 

Unnamed: 0,endpoint,score,model,partition,hrs,hrs_ret,hrs_ret_age,hrs_ret_sex
0,OMOP_4306655,ASCVD+Retina,RetinaUKB,0,"{'age': 1.425971352638649, 'sex_Male': 1.20008...",1.588715,,
1,OMOP_4306655,ASCVD+Retina,RetinaUKB,1,"{'age': 1.403872283712559, 'sex_Male': 1.21462...",1.517196,,
2,OMOP_4306655,ASCVD+Retina,RetinaUKB,2,"{'age': 1.380318531736509, 'sex_Male': 1.22405...",1.557983,,
3,OMOP_4306655,ASCVD+Retina,RetinaUKB,3,"{'age': 1.3946404732317188, 'sex_Male': 1.2081...",1.542834,,
4,OMOP_4306655,ASCVD+Retina,RetinaUKB,4,"{'age': 1.4390273470582076, 'sex_Male': 1.2187...",1.560359,,
...,...,...,...,...,...,...,...,...
14715,phecode_979,SCORE2+Retina,RetinaUKB,5,"{'age': 1.000935811390497, 'sex_Male': 1.03025...",1.285094,,
14716,phecode_979,SCORE2+Retina,RetinaUKB,6,"{'age': 1.0217726359478303, 'sex_Male': 1.0276...",1.314320,,
14717,phecode_979,SCORE2+Retina,RetinaUKB,7,"{'age': 1.005468448949052, 'sex_Male': 1.05263...",1.320810,,
14718,phecode_979,SCORE2+Retina,RetinaUKB,8,"{'age': 1.0112701799397874, 'sex_Male': 1.0686...",1.331639,,


In [19]:
name = f"hrs_endpoints"
hrs_endpoints.to_feather(f"{experiment_path}/{name}.feather")

In [20]:
f"{experiment_path}/{name}.feather"

'/home/jakobs/data/230629/hrs_endpoints.feather'

In [24]:
hrs_endpoints

Unnamed: 0,endpoint,score,model,partition,hrs,hrs_ret,hrs_ret_age,hrs_ret_sex
0,OMOP_4306655,Age+Sex+Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,0,"{'age': 1.1569650058150076, 'sex_Male': 1.2521...",3.067072,,
1,OMOP_4306655,Age+Sex+Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,1,"{'age': 1.1930293430626582, 'sex_Male': 1.2329...",3.343209,,
2,OMOP_4306655,Age+Sex+Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,2,"{'age': 1.1675932654272487, 'sex_Male': 1.2889...",3.046918,,
3,OMOP_4306655,Age+Sex+Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,3,"{'age': 1.1449403577207453, 'sex_Male': 1.2303...",3.465731,,
4,OMOP_4306655,Age+Sex+Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,4,"{'age': 1.2232534874397427, 'sex_Male': 1.2894...",3.204823,,
...,...,...,...,...,...,...,...,...
51519,phecode_979,Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,17,{'phecode_979': 2.9028276127399497},2.902828,,
51520,phecode_979,Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,18,{'phecode_979': 2.8184150503434156},2.818415,,
51521,phecode_979,Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,19,{'phecode_979': 2.8725367312067025},2.872537,,
51522,phecode_979,Retina,ImageTraining_[]_ConvNeXt_MLPHead_cropratio0.66,20,{'phecode_979': 3.0356558826419855},3.035656,,


In [25]:
cph.plot()

NameError: name 'cph' is not defined

In [None]:
#[[]]

In [None]:
cph.print_summary()