In [None]:
#!/usr/bin/env python
# coding: utf-8

import os
import math
import sys
import pathlib
import numpy as np
import pandas as pd
from tqdm.auto import tqdm
from IPython.display import clear_output

import warnings
import lifelines
from lifelines.utils import CensoringType
from lifelines.utils import concordance_index

from lifelines import CoxPHFitter
from lifelines.exceptions import ConvergenceError
from formulaic.errors import FactorEvaluationError
import zstandard
import pickle
import yaml
import ray
import argparse

In [None]:
base_path = "/sc-projects/sc-proj-ukb-cvd"
print(base_path)

project_label = "22_medical_records"
project_path = f"{base_path}/results/projects/{project_label}"
figure_path = f"{project_path}/figures"
output_path = f"{project_path}/data"

In [None]:
ci_df = pd.read_feather("outputs/covid_cindeces_230425.feather")[["eid", "logh", "event", "time", "predictors", "severity"]].set_index("eid")

In [None]:
eids = ci_df.index.unique()
predictors = ci_df.predictors.unique()
severities = ci_df.severity.unique()
ci_df_dict = dict(tuple(ci_df.groupby(['predictors', "severity"])))

In [None]:
os.environ['MKL_NUM_THREADS'] = "1"
os.environ['NUMEXPR_NUM_THREADS'] = "1"
os.environ['OMP_NUM_THREADS'] = "1"

ray.init(num_cpus=12, include_dashboard=False)

In [None]:
@ray.remote
def calc_cindex(df_dict_ref, eids_i, i, severity, predictor):
    df = df_dict_ref[predictor, severity]
    df_i = df[df.index.isin(eids_i)]
    cindex = 1-concordance_index(df_i["time"], df_i["logh"], df_i["event"])
    return {"iteration": i, "severity":severity, "predictor": predictor, "cindex":cindex}

ci_df_dict_ref = ray.put(ci_df_dict)

rows_ray = []
for i in tqdm(range(1000)):
    eids_i = np.random.choice(eids, size=len(eids))
    for predictor in predictors:
        for severity in severities:
            ray_ref = calc_cindex.remote(ci_df_dict_ref, eids_i, i, severity, predictor)
            rows_ray.append(ray_ref)

            
rows_finished = [ray.get(r) for r in tqdm(rows_ray)]

In [None]:
benchmarks = pd.DataFrame({}).append(rows_finished, ignore_index=True) 

In [None]:
benchmarks_agg = benchmarks.groupby(["severity", "predictor"])["cindex"].quantile([0.5, 0.025, 0.975]).to_frame().reset_index()
benchmarks_agg["cindex"] = benchmarks_agg["cindex"].round(3)

In [None]:
nice_df = benchmarks_agg.pivot(index=["severity", "predictor"], columns="level_2", values="cindex")

In [None]:
nice_df.reset_index().sort_values(["severity", "predictor"])