In [None]:
import pandas as pd

In [None]:
import glob
import os
import numpy as np

MODEL_KWARGS = {
    "YModel": {"color": "green", "linestyle": "dashed"},
    "YObsModel": {"color": "red", "linestyle": "dotted"},
    "TestedOnlyModel": {"color": "darkseagreen"},
    "RecensoringModel": {"color": "purple"},
    "Group0BaselineModel": {"color": "tab:blue"}, #, "alpha": 0.5},
    "Group1BaselineModel": {"color": "tab:orange"}, #, "alpha": 0.5},
    "PeerLossModel": {"color": "goldenrod"},
    "GroupPeerLossModel": {"color": "gold"},
    "JSModel": {"color": "steelblue"},
    "TruncatedLQModel": {"color": "slategray"},
    "DivideMixBasedModel": {"color": "skyblue"},
    #"DivideMixCheatModel": {"color": "silver"},
    #"SELFCheatModel": {"color": "skyblue"},
    "ITECorrectedModel": {"color": "chocolate"},
    "SELFModel": {"color": "midnightblue"},
    "SAREMModel": {"color": "purple"},
    "DCEMModel": {"color": "magenta"},
}

NAME_REPLACEMENT = {
    "YModel": "$y$-model (oracle)",
    "YObsModel": "$y$-obs model",
    "TestedOnlyModel": "Tested-only",
    "Group0BaselineModel": "Group 0 only",
    "Group1BaselineModel": "Group 1 only",
    "PeerLossModel": "Peer loss",
    "GroupPeerLossModel": "Group peer loss",
    "ITECorrectedModel": "DragonNet", # "ITE-corrected",
    "JSModel": "Generalized JS",
    "TruncatedLQModel": "Truncated LQ",
    "SELFModel": "SELF",
    "SAREMModel": "SAREM",
    "DCEMModel": "DCEM (ours)"
}



sepsis_results = []
#sepsis_bs = []
for f in sorted(glob.glob("/data4/username/disparate_censorship_mitigation_sepsis/sepsis_20230724_*alpha*/")):
    print(f)
    df = pd.read_csv(os.path.join(f, "results.csv"), index_col=np.arange(4))
    #bs_df = pd.read_csv(os.path.join(f, "bootstrap_results.csv"), index_col=np.arange(5))
    if "testedonly" not in f:
        df = df[df.index.get_level_values("model") != "DCEMModel"]
    sepsis_results.append(df)
    #sepsis_bs.append(bs_df)
    
sepsis_results = pd.concat(sepsis_results, keys=np.tile(np.linspace(0, 1, 11), 2))
#sepsis_bs = pd.concat(sepsis_bs, keys=np.linspace(0, 1, 10))
sepsis_results


In [None]:
df = pd.read_csv("/data4/username/disparate_censorship_mitigation_sepsis/sepsis_20230724_alpha1.000/results.csv")
df[df["model"] == "ITECorrectedModel"]

In [None]:
sepsis_results.xs((4, 1.5), level=(1, 2)).xs("ITECorrectedModel", level="model")

In [None]:
import matplotlib.pyplot as plt
plt.rcParams['text.usetex'] = True

fig, ax = plt.subplots(1, 2, figsize=(8, 2))
sepsis_slice = sepsis_results.xs((4, 1.5), level=(1, 2)).sort_values(by="model", key=lambda column: column.map(lambda e: list(MODEL_KWARGS.keys()).index(e)))

models = sepsis_slice.index.get_level_values("model").unique()
include_models = []
for m in models:
    #if m not in ["ITECorrectedModel", "DCEMModel"]: continue
    if m in ["Group0BaselineModel", "Group1BaselineModel", "YModel"]:
        continue
    df = sepsis_slice.xs(m, level="model").sort_index()
    x = df.index.get_level_values(0).unique()
    y1 = df.xs("AUC", level="metric")["overall"]
    y2 = df.xs("ROCGap", level="metric")["diff"]

    
    ax[1].set_title(r"$\uparrow$ Discriminative performance")
    ax[0].set_title(r"$\downarrow$ Bias mitigation")
    
    ax[1].set_xlabel(r"$\leftarrow$ Systolic BP more salient $\mid$ Resp. rate more salient $\rightarrow$" + "\n\n Systolic BP vs. resp. rate weighing, $s_T$")
    ax[0].set_xlabel(r"$\leftarrow$ Systolic BP more salient $\mid$ Resp. rate more salient $\rightarrow$" + "\n\n Systolic BP vs. resp. rate weighing, $s_T$")

    ax[1].set_ylabel("AUC")
    ax[0].set_ylabel("ROC Gap")
    
    ax[1].plot(x, y1, **MODEL_KWARGS[m], label=NAME_REPLACEMENT[m], alpha=0.5, marker=".")
    ax[0].plot(x, y2, **MODEL_KWARGS[m], label=None, alpha=0.5, marker=".")
    include_models.append(m)
    
lgd = fig.legend(labels=list(map(lambda x: NAME_REPLACEMENT.get(x, x), include_models)), loc="lower center", ncol=4, bbox_to_anchor=(0.5, -0.2))
fig.tight_layout()
plt.savefig("sepsis_wrt_boundaries.pdf", bbox_extra_artists=(lgd,), bbox_inches='tight')

In [None]:
sepsis_slice

In [None]:
import pickle
import torch
import io
import os

import sys
if sys.path[0] != "..":
    sys.path.insert(0, "..")

class CPU_Unpickler(pickle.Unpickler):
    def find_class(self, module, name):
        if module == 'torch.storage' and name == '_load_from_bytes':
            return lambda b: torch.load(io.BytesIO(b), map_location='cpu')
        else:
            return super().find_class(module, name)


MODEL_DIR = "/data4/username/disparate_censorship_mitigation_sepsis/sepsis_20240123_dcem_testedonly_alpha0.000/"
SETTING_DIR = "k_4_testing_disparity_1.5/"

print("Loading data...")
with open(os.path.join(MODEL_DIR, "data_dict.pkl"), "rb") as f:
    data_dict = CPU_Unpickler(f).load()

print("Loading model...")
with open(os.path.join(MODEL_DIR, SETTING_DIR, "DCEMModel_model_info/model.pkl"), "rb") as f:
    model = CPU_Unpickler(f).load()
    
#print("Loading aux model info...")
#with open(EXTRA_INFO_DEMO, "rb") as f:
#    extra_info = CPU_Unpickler(f).load()
    
print("Loading sim...")
with open(os.path.join(MODEL_DIR,SETTING_DIR, "sim.pkl"), "rb") as f:
    sim = CPU_Unpickler(f).load()


In [None]:
D_tr = data_dict[("k_4", "testing_disparity_1.5")]["train"]
X_tr, A_tr, T_tr = D_tr["X"], D_tr["A"], D_tr["T"]

D_ts = data_dict[("k_4", "testing_disparity_1.5")]["test"]
X_ts, A_ts, T_ts = D_ts["X"], D_ts["A"], D_ts["T"]

import numpy as np
XA_ts = np.concatenate([X_ts, X_ts * A_ts[:, None], A_ts[:, None]], axis=1)

In [None]:
from sklearn.metrics import roc_auc_score

props = model.uni_propensity_model.predict_proba(XA_ts)
roc_auc_score(T_ts, props[:, 1])

In [None]:
sepsis_old = pd.read_csv("./data4/")