In [None]:
import pandas as pd
import json
from IPython.display import display, Markdown

from utilities import data, roc, threshold
from utilities.info import *

In [None]:
RESULTS_DIR = f"{EXPERIMENT_DIR}/temp-results"
NUM_BOOTSTRAPS = 10
FILE_DIR, RESULTS_DIR

## DLCST

In [None]:
dlcst_preds = pd.read_csv(f"{FILE_DIR}/dlcst_allmodels_cal.csv", header=0)
print(len(dlcst_preds))

DLCST_MODELCOLS = {
    "Venkadesh": "Ensemble_Kiran_cal",
    "de Haas": "thijmen_mean_cal",
    "PanCan2b": "PanCan2b",
    "Sybil year 1": "sybil_year1",
}

dlcst_preds, dlcst_democols = data.bin_numerical_columns(dlcst_preds, DLCST_DEMOCOLS)
dlcst_democols

In [None]:
roc_dlcst = roc.all_results_subgroups_models(
    dlcst_preds, dlcst_democols['cat'], models=DLCST_MODELCOLS, 
    csvpath=f"{RESULTS_DIR}/auroc-dlcst-{len(dlcst_preds)}.csv", 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_dlcst

In [None]:
dlcst_policies, _ = threshold.get_threshold_policies(dlcst_preds, models=DLCST_MODELCOLS, policies=THRESHOLD_POLICIES, brock=True)
dlcst_policies

In [None]:
dlcst_thresholds = threshold.all_results_subgroups_models(
    dlcst_preds, dlcst_democols['cat'], policies=dlcst_policies, models=DLCST_MODELCOLS, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-dlcst-{len(dlcst_preds)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

dlcst_thresholds

# NLST

In [None]:
nlst_preds_nodule = pd.read_csv(f"{FILE_DIR}/nlst_allmodels_demos.csv")

with open(f'{FILE_DIR}/nlst_democols.json') as json_data:
    nlst_democols_original = json.load(json_data)
    json_data.close()

nlst_democols_original['cat'].pop('lungcanc')

## NLST Scans (all)

In [None]:
nlst_allscans, nlst_allscans_democols, nlst_allscans_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=False, sybil=True, bin_num=True)
print(len(nlst_allscans), " Scans")

display(nlst_allscans_models)
nlst_allscans_democols

In [None]:
roc_nlst_allscans = roc.all_results_subgroups_models(nlst_allscans, nlst_allscans_democols['cat'], models=nlst_allscans_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_allscans)}.csv", 
                                            plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_allscans

In [None]:
nlst_allscans_policies, _ = threshold.get_threshold_policies(nlst_allscans, models=nlst_allscans_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_allscans_policies

In [None]:
threshold_allscans = threshold.all_results_subgroups_models(
    nlst_allscans, nlst_allscans_democols['cat'], policies=nlst_allscans_policies, models=nlst_allscans_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_allscans)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_allscans

## NLST Scans (tijmen)

In [None]:
nlst_somescans, nlst_somescans_democols, nlst_somescans_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=True, tijmen=True, sybil=True, bin_num=True)
print(len(nlst_somescans), " Scans")

display(nlst_somescans_models)
nlst_somescans_democols

In [None]:
roc_nlst_somescans = roc.all_results_subgroups_models(nlst_somescans, nlst_somescans_democols['cat'], models=nlst_somescans_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_somescans)}.csv", 
                                            plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_somescans

In [None]:
nlst_somescans_policies, _ = threshold.get_threshold_policies(nlst_somescans, models=nlst_somescans_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_somescans_policies

In [None]:
threshold_somescans = threshold.all_results_subgroups_models(
    nlst_somescans, nlst_somescans_democols['cat'], policies=nlst_somescans_policies, models=nlst_somescans_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_somescans)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_somescans

## NLST Nodules (all)

In [None]:
nlst_allnodules, nlst_allnodules_democols, nlst_allnodules_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=False, sybil=False, bin_num=True)
print(len(nlst_allnodules), " nodules")

display(nlst_allnodules_models)
nlst_allnodules_democols

In [None]:
roc_nlst_allnodules = roc.all_results_subgroups_models(nlst_allnodules, nlst_allnodules_democols['cat'], models=nlst_allnodules_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_allnodules)}.csv", 
                                            plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_allnodules

In [None]:
nlst_allnodules_policies, _ = threshold.get_threshold_policies(nlst_allnodules, models=nlst_allnodules_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_allnodules_policies

In [None]:
threshold_allnodules = threshold.all_results_subgroups_models(
    nlst_allnodules, nlst_allnodules_democols['cat'], policies=nlst_allnodules_policies, models=nlst_allnodules_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_allnodules)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_allnodules

## NLST Nodules (tijmen)

In [None]:
nlst_somenodules, nlst_somenodules_democols, nlst_somenodules_models = data.prep_nlst_preds(nlst_preds_nodule, nlst_democols_original, scanlevel=False, tijmen=True, sybil=False, bin_num=True)
print(len(nlst_somenodules), " nodules")

display(nlst_somenodules_models)
nlst_somenodules_democols

In [None]:
roc_nlst_somenodules = roc.all_results_subgroups_models(nlst_somenodules, nlst_somenodules_democols['cat'], models=nlst_somenodules_models, 
                                            csvpath=f"{RESULTS_DIR}/auroc-nlst-{len(nlst_somenodules)}.csv", 
                                            plot=False, num_bootstraps=NUM_BOOTSTRAPS)
roc_nlst_somenodules

In [None]:
nlst_somenodules_policies, _ = threshold.get_threshold_policies(nlst_somenodules, models=nlst_somenodules_models, policies=THRESHOLD_POLICIES, brock=True)
nlst_somenodules_policies

In [None]:
threshold_somenodules = threshold.all_results_subgroups_models(
    nlst_somenodules, nlst_somenodules_democols['cat'], policies=nlst_somenodules_policies, models=nlst_somenodules_models, 
    csvpath=f'{RESULTS_DIR}/threshold-perfs-nlst-{len(nlst_somenodules)}.csv', 
    plot=False, num_bootstraps=NUM_BOOTSTRAPS)

threshold_somenodules