In [None]:
import sys
import re
import gc
sys.path.append("/nfs/nas-7.1/ckwu/mtl-icda-ht")

import json
import pickle
from pathlib import Path
from argparse import Namespace

import pandas as pd

import torch
from torch.utils.data import Dataset, DataLoader
from scipy.stats import ttest_rel
from transformers import BertTokenizerFast

from utilities.data import MedicalDxDataset, DxBatchCollator, MedicalDxNERIOBDataset, convert_icds_to_indices, split_by_div
from utilities.model import BertDxModel, BertDxNERModel, encoder_names_mapping
from utilities.utils import move_bert_input_to_device, set_seeds
from utilities.evaluation import predict_whole_set_dx, get_top_k_accuracies, get_evaluations

## Configuration

In [None]:
"""
    Configuration
"""
config = json.loads(Path("./dx_config.json").read_bytes())
args = Namespace(**config)

if "cuda" in args.device:
    assert torch.cuda.is_available()

set_seeds(args.seed)

## Data

In [None]:
emrs = pickle.loads(Path(args.emr_path).read_bytes())
icds = pickle.loads(Path(args.dx_path).read_bytes())
icd_ids = convert_icds_to_indices(icds, full_code=args.fc)

# Validation split
data_l = [emrs, icd_ids]
valid_emrs, valid_dxs = [split_by_div(data, args.fold, args.remainder, mode="valid") for data in data_l]

# Dataset & DataLoader
tokenizer = BertTokenizerFast.from_pretrained(encoder_names_mapping[args.tokenizer])
collate_fn = DxBatchCollator(tokenizer)
valid_set = MedicalDxDataset(valid_emrs, valid_dxs, tokenizer)
valid_loader = DataLoader(valid_set, args.bs, shuffle=False, pin_memory=True, collate_fn=collate_fn)

## Model

In [None]:
EMBED_SIZE = 768
LABEL_SIZE = 97

model = BertDxModel(
    model_name=encoder_names_mapping[args.encoder],
    embed_size=EMBED_SIZE,
    label_size=LABEL_SIZE
)
model.load_state_dict(torch.load(args.ckpt_path))

## Evaluation

In [None]:
preds = predict_whole_set_dx(model, valid_loader, args.device).detach().cpu()

In [None]:
get_top_k_accuracies(valid_dxs, preds, k=9, labels=range(LABEL_SIZE))

## k-Fold Cross Validation

### Prerequisite

In [None]:
"""
    Configuration
"""
config = json.loads(Path("./dx_ner_config.json").read_bytes())
args = Namespace(**config)

set_seeds(args.seed)

"""
    Data
"""
emrs = pickle.loads(Path(args.emr_path).read_bytes())
icds = pickle.loads(Path(args.dx_path).read_bytes())
icd_ids = convert_icds_to_indices(icds, full_code=args.fc)
ner_spans_l = pickle.loads(Path(args.ner_spans_l_path).read_bytes())
data_l = [emrs, icd_ids, ner_spans_l]
"""
    Others
"""
tokenizer = BertTokenizerFast.from_pretrained(encoder_names_mapping[args.tokenizer])
# collate_fn = DxBatchCollator(tokenizer)
# EMBED_SIZE = 768
DX_LABEL_SIZE = 97
NER_LABEL_SIZE = 3

### Eval Loop

In [None]:
top_k_acc_dfs = list()
eval_dfs = list()

for k in range(args.fold):
    print(f"Start evaluating fold = {k}:\n")
    # Config
    args.ckpt_path = re.sub(pattern=r"remainder\-\d", repl=f"remainder-{k}", string=args.ckpt_path)

    # Data
    valid_emrs, valid_dxs, valid_ners = [split_by_div(data, args.fold, remainder=k, mode="valid") for data in data_l]
    valid_set = MedicalDxNERIOBDataset(valid_emrs, valid_dxs, valid_ners, tokenizer)
    valid_loader = DataLoader(valid_set, args.bs, shuffle=False, pin_memory=True, collate_fn=valid_set.collate_fn)

    # Model
    model = BertDxNERModel(
        encoder=encoder_names_mapping[args.encoder],
        dx_label_size=DX_LABEL_SIZE,
        ner_label_size=NER_LABEL_SIZE,
        loss_weights=args.lw
    )
    model.load_state_dict(torch.load(args.ckpt_path))

    # Evaluation
    scores = predict_whole_set_dx(model, valid_loader, args.device).detach().cpu()
    preds = scores.argmax(dim=-1)
    top_k_acc_df = get_top_k_accuracies(valid_dxs, scores, k=9, labels=range(DX_LABEL_SIZE))
    top_k_acc_dfs.append(top_k_acc_df)
    eval_df = get_evaluations(valid_dxs, preds, DX_LABEL_SIZE, scores, args.encoder)
    eval_dfs.append(eval_df)

    del valid_emrs, valid_dxs, valid_set, valid_loader, model, scores, preds, top_k_acc_df, eval_df
    gc.collect()

### Top-k Accuracy

In [None]:
top_k_acc_dfs = [top_k_acc_dfs[i].T for i in range(args.fold)]
top_k_acc_dfs_cat = pd.concat(top_k_acc_dfs)

acc_mean_df = top_k_acc_dfs_cat.mean(axis=0).to_frame().T.rename({0: "mean"})
acc_std_df = top_k_acc_dfs_cat.std(axis=0).to_frame().T.rename({0: "std"})
acc_mean_std_df = pd.concat(objs=[acc_mean_df, acc_std_df])

acc_mean_std_df

### Combined Evaluations

In [None]:
eval_dfs_cat = pd.concat(eval_dfs)

eval_mean_df = eval_dfs_cat.mean(axis=0).to_frame().T.rename({0: "mean"})
eval_std_df = eval_dfs_cat.std(axis=0).to_frame().T.rename({0: "std"})
eval_mean_std_df = pd.concat(objs=[eval_mean_df, eval_std_df])

eval_mean_std_df

### Save Evaluation Results

In [None]:
args.eval_save_dir = Path(args.eval_save_dir)
args.eval_save_dir.mkdir(parents=True, exist_ok=True)

acc_save_dir = args.eval_save_dir / f"{args.encoder}_top_k_acc_mean_std.csv"
eval_save_dir = args.eval_save_dir / f"{args.encoder}_eval_mean_std.csv"

acc_mean_std_df.to_csv(acc_save_dir, index_label="index")
eval_mean_std_df.to_csv(eval_save_dir, index_label="index")

loaded_acc_df = pd.read_csv(acc_save_dir, index_col="index")
loaded_eval_df = pd.read_csv(eval_save_dir, index_col="index")

### Load Evaluation Results

In [None]:
to_load = Path("/nfs/nas-7.1/ckwu/mtl-icda-ht/comparisons/eval_results/dx/ClinicalBERT_eval_mean_std.csv")

df = pd.read_csv(to_load, index_col="index")
df

## Old Codes

In [None]:
"""
    Observe Evaluations
"""
eval_results = {
    "dx": list(),
    "dx_ner": list()
}

def combine_evaluations(dfs: list[pd.DataFrame]) -> pd.DataFrame:
    final_df = None
    for i, df in enumerate(dfs):
        df = df.rename({"acc": f"remainder-{i}"}, axis=1).T
        if final_df is None:
            final_df = df
        else:
            final_df = pd.concat([final_df, df], axis=0)
    return final_df

for model_type in ["dx", "dx_ner"]:
    for r in range(config["fold"]):
        file_path = f"./eval_results/{model_type}/{config['fold']}-fold/remainder-{r}_top_k_accuracies.csv"
        with open(file_path) as f:
            df = pd.read_csv(f, index_col="k")
        eval_results[model_type].append(df)

dx_result = combine_evaluations(eval_results["dx"])
mtl_result = combine_evaluations(eval_results["dx_ner"])

dx_mean = dx_result.mean(axis=0).to_frame().T
mtl_mean = mtl_result.mean(axis=0).to_frame().T
comparison  = pd.concat([dx_mean, mtl_mean], ignore_index=True).rename(mapper={0: "dx", 1: "dx_ner"}, axis=0)

p_values = list()
for k in range(1, 10):
    a = mtl_result[k].values
    b = dx_result[k].values
    p = ttest_rel(a, b, alternative="greater")[1]
    p_values.append(p)

p_values_df = pd.DataFrame(p_values).T.rename(mapper=lambda c: c + 1, axis=1)
comparison = pd.concat([comparison, p_values_df]).rename(mapper={0: "p_value"}, axis=0)

comparison

In [None]:
# Diagnosis single task accuracy
best_val_accs = list()

for r in range(10):
    ckpt_path = Path(f"/nfs/nas-7.1/ckwu/mtl-icda-ht/components_testing/diagnosis/eval_results/encoder-BioBERT_dx-97_lr-4e-05_remainder-{r}.json")
    train_log = json.loads(ckpt_path.read_bytes())
    best_val_acc = train_log["best_val_acc"]
    best_val_accs.append(best_val_acc)

In [None]:
import numpy as np

best_val_accs = np.array(best_val_accs)
best_val_accs.mean(), best_val_accs.std()

In [None]:
# NER single task accuracy
single_ner_accs = list()

for r in range(10):
    ckpt_path = Path(f"/nfs/nas-7.1/ckwu/mtl-icda-ht/components_testing/ner/eval_results/encoder-BioBERT_nepochs-10_bs-16_lr-5e-05_fold-10_remainder-{r}.txt")
    ner_acc = float(ckpt_path.read_text().rstrip())
    single_ner_accs.append(ner_acc)

In [None]:
single_ner_accs = np.array(single_ner_accs)
single_ner_accs.mean(), single_ner_accs.std()

In [None]:
import json
from pathlib import Path

# lws = [f"{d}.0" for d in range(5, 9)]
# lrs = ["4e-05"]
stps = [3, 4, 5]

# dx_acc_comps = {lr: list() for lr in lrs}
# ner_acc_comps = {lr: list() for lr in lrs}
dx_acc_comps = {stp: list() for stp in stps}
ner_acc_comps = {stp: list() for stp in stps}

# for lw in lws:
for stp in stps:
    best_dx_accs = list()
    best_ner_accs = list()
    for r in range(10):
        ckpt_path = Path(f"/nfs/nas-7.1/ckwu/mtl-icda-ht/multitask_models/diagnosis_ner/models_separate_update/encoder-BioBERT_fc-False_lw-6.0_nersteps-{stp}_lr-4e-05_remainder-{r}/train_log.json")
        train_log = json.loads(ckpt_path.read_bytes())
        best_dx_accs.append(train_log["best_dx_acc"])
        best_ner_accs.append(train_log["best_ner_acc"])
    mean_dx_acc = sum(best_dx_accs) / len(best_dx_accs)
    mean_ner_acc = sum(best_ner_accs) / len(best_ner_accs)

    dx_acc_comps[stp].append(mean_dx_acc)
    ner_acc_comps[stp].append(mean_ner_acc)

In [None]:
import numpy as np

best_dx_accs = np.array(best_dx_accs)
best_dx_accs.mean(), best_dx_accs.std()

In [None]:
best_ner_accs = np.array(best_ner_accs)
best_ner_accs.mean(), best_ner_accs.std()

In [None]:
# nerstps lw-6.0
dx_acc_comps, ner_acc_comps

In [None]:
# 3e-5 no Aho
dx_acc_comps

In [None]:
# No Aho-Cora
dx_acc_comps

In [None]:
sum(best_dx_accs) / len(best_dx_accs), sum(best_ner_accs) / len(best_ner_accs)

In [None]:
import math

In [None]:
# pure NER
best_pure_ner_accs = list()
for r in range(10):
    ckpt_path = Path(f"/nfs/nas-7.1/ckwu/mtl-icda-ht/components_testing/ner/eval_results/encoder-BioBERT_nepochs-10_bs-16_lr-5e-05_fold-10_remainder-{r}.txt")
    best_pure_ner_acc = float(ckpt_path.read_text().rstrip())
    best_pure_ner_accs.append(best_pure_ner_acc)

sum(best_pure_ner_accs) / len(best_pure_ner_accs)

## NER Evaluation

In [None]:
eval_files_prefix = {
    "ner": "../components_testing/ner/eval_results/nepochs-5_fold-10_remainder",
    "dx_ner": "../multitask_models/diagnosis_ner/eval_results/mtl_lw-1.0-9.0_fold-10_lr5e-5_remainder"
}

eval_results = {
    "ner": list(),
    "dx_ner": list()
}

folds = 10
for key in eval_results.keys():
    for remainder in range(folds):
        file = f"{eval_files_prefix[key]}-{remainder}.json"
        with open(file) as f:
            result = json.load(f)
        ner_acc = result["best_val_acc"] if key == "ner" else result["best_ner_acc"]
        eval_results[key].append(ner_acc)

In [None]:
stl_mean = sum(eval_results["ner"]) / len(eval_results["ner"])
mtl_mean = sum(eval_results["dx_ner"]) / len(eval_results["dx_ner"])

eval_results["dx_ner"]

In [None]:
ttest_rel(a=eval_results["ner"], b=eval_results["dx_ner"], alternative="two-sided")

In [None]:
eval_results["ner"]

In [None]:
eval_results["dx_ner"]

## Plotting

In [None]:
import json
import pandas as pd

In [None]:
file = "/nfs/nas-7.1/ckwu/mtl-icda-ht/multitask_models/diagnosis_ner/eval_results/encoder-BioBERT_dx-97_ner-2_lw-1.0-8.0_lr-5e-05_remainder-0.json"
with open(file) as f:
    train_log = json.load(f)

In [None]:
dx_acc = train_log["dx"]["acc"]
ner_acc = train_log["ner"]["acc"]

dx_loss = train_log["dx"]["loss"]
ner_loss = train_log["ner"]["loss"]

In [None]:
dx_ner_comp = pd.DataFrame(data={
    "dx_acc": dx_acc,
    "ner_acc": ner_acc,
    "dx_loss": dx_loss,
    "ner_loss": ner_loss
})

In [None]:
dx_ner_comp["dx_acc"].plot()
dx_ner_comp["ner_acc"].plot(secondary_y=True)

In [None]:
dx_ner_comp["dx_loss"].plot()
dx_ner_comp["ner_loss"].plot(secondary_y=True)