# Benchmarks

## Initialize

In [None]:
import os
import math
import pathlib
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from IPython.display import clear_output

import warnings
from lifelines.utils import CensoringType
from lifelines.utils import concordance_index

In [None]:
base_path = "/home/jakobs"

project_path = f"{base_path}/data"

experiment = '230321'
experiment_path = f"{project_path}/{experiment}"
pathlib.Path(experiment_path).mkdir(parents=True, exist_ok=True)

partitions = [i for i in range(10)]

today = '230321'

In [None]:
splits = ["train",'test'] # "test_left", 'test_right']

In [None]:
endpoint_defs = pd.read_feather(f"{base_path}/data/endpoints_epic_md.feather").set_index("endpoint")
endpoints = endpoint_defs.index.to_list()

In [None]:
eligable_eids = pd.read_feather(f"{experiment_path}/eligible_eids_{today}.feather")
eids_dict = eligable_eids.set_index("endpoint")["eid_list"].to_dict()

In [None]:
%env MKL_NUM_THREADS=4
%env NUMEXPR_NUM_THREADS=4
%env OMP_NUM_THREADS=4

In [None]:
#ray.shutdown()

In [None]:
import ray

ray.init(address="auto")#, dashboard_port=24762, dashboard_host="0.0.0.0", include_dashboard=True)#, webui_url="0.0.0.0"))

# Train COX

In [None]:
in_path = pathlib.Path(f"{experiment_path}/coxph/input")
model_path = f"{experiment_path}/coxph/models"

out_path = f"{experiment_path}/coxph/predictions"
pathlib.Path(out_path).mkdir(parents=True, exist_ok=True)

In [None]:
import pickle
import zstandard

def load_pickle(fp):
    with open(fp, "rb") as fh:
        dctx = zstandard.ZstdDecompressor()
        with dctx.stream_reader(fh) as decompressor:
            data = pickle.loads(decompressor.read())
    return data

In [None]:
cox_paths = !ls $model_path
cox_paths = [p for p in cox_paths if "_Retina" in p or "+Retina" in p or "I(" in p]
cox = pd.Series(cox_paths).str.split("_", expand=True)\
    .assign(path = cox_paths)\
    .assign(endpoint = lambda x: x[0]+"_"+x[1])\
    .assign(score = lambda x: x[2])\
    .assign(model = lambda x: x[3])\
    .assign(partition = lambda x: x[4].str.replace(".p", "", regex=True).astype(int))\
    [["endpoint", "score", "model", "partition", "path"]].sort_values(["endpoint", "score", "partition"])\
    .query("endpoint ==@ endpoints")\
    .query("score.str.contains('Retina')")\
    .reset_index(drop=True)
cox

In [None]:
#endpoints = sorted(cox.endpoint.unique().tolist())
scores = sorted(cox.score.unique().tolist())
partitions = sorted(cox.partition.unique().tolist())

In [None]:
#import ray

#@ray.remote
def get_cox_info(p):
    cph = load_pickle(f"{model_path}/{p}")
    p_split = p.split("_")
    endpoint = f"{p_split[0]}_{p_split[1]}"
    score = p_split[2]
    model = p_split[3]
    partition = p_split[4].replace(".p", "")
    hrs = cph.hazard_ratios_.to_dict()
    
    hr_ret = hrs[endpoint] 
    hr_ret_age = np.nan
    hr_ret_sex = np.nan
        
    return {"endpoint": endpoint, 
            "score": score, 
            "model": model,
            "partition": partition, 
            "hrs": hrs, 
            "hrs_ret": hr_ret, 
            "hrs_ret_age": hr_ret_age, 
            "hrs_ret_sex": hr_ret_sex
           }

In [None]:
p = cox.path.to_list()[0]
cph = load_pickle(f"{model_path}/{p}")
p_split = p.split("_")
endpoint = f"{p_split[0]}_{p_split[1]}"
score = p_split[2]
model = p_split[3]
partition = p_split[4].replace(".p", "")
hrs = cph.hazard_ratios_.to_dict()

In [None]:
cph.print_summary()

In [None]:
rows = []

for p in tqdm(cox.path.tolist()):
    rows.append(get_cox_info(p))

In [None]:
hrs_endpoints = pd.DataFrame(rows)

In [None]:
hrs_endpoints 

In [None]:
name = f"hrs_endpoints"
hrs_endpoints.to_feather(f"{experiment_path}/{name}.feather")

In [None]:
hrs_endpoints

In [None]:
cph.plot()

In [None]:
#[[]]

In [None]:
cph.print_summary()