In [1]:
import numpy as np
import pandas as pd
import csv
import os
from SepsisCheck import sepsischeck_utilities_for_pkl as su
from sklearn.metrics import precision_recall_fscore_support as score
from sklearn.metrics import classification_report as report
from sklearn.metrics import roc_auc_score as auroc
from sklearn.metrics import confusion_matrix

### Take a look at the results of sepsis check on patients

In [2]:
def convert_result_to_df(filename):
    result_dict = []
    with open(filename, 'r') as f:
        for i, line in enumerate(f):
            line_list = line.split()
            line_list = [s.strip(',') for s in line_list]
            line_list = [s.replace(',', '') for s in line_list]
            result_dict.append(dict({'Subject ID': line_list[2], 
                                        'Hadm_ID': line_list[4],
                                        'ts_ind': line_list[6],
                                        'Sepsis': line_list[8], 
                                        't_sepsis': line_list[10],
                                        't_sofa': line_list[12], 
                                        't_cultures': line_list[14], 
                                        't_IV': line_list[16], 
                                        't_sus': line_list[18]}))
    df = pd.DataFrame.from_records(result_dict)

    return df

In [5]:
# load data we classified
path = "../data/patient/mimic_iii_preprocessed_finetuning2.pkl"
data = pd.read_pickle(path)
#sort by ts_ind as that is how the results are sorted
data1 = data[1].sort_values(by=["ts_ind"])
#make ground truth for scoring, reset index after sorting. Index 0 -> ts_ind 0
ground_truth = data1["in_hospital_sepsis"].reset_index(drop=True)

#load positive and negative with IV and cultures feature from file -> faster than computing again

with open("./features/possible_predicitons/possible_pos_predictions.csv") as f:
    reader = csv.reader(f)
    possible_pos_predictions = [int(row[0]) for row in reader]
with open("./features/possible_predicitons/possible_neg_predictions.csv") as f:
    reader = csv.reader(f)
    possible_neg_predictions = [int(row[0]) for row in reader]

#indeces of all patients with IV and cultures
all_possible_ = possible_pos_predictions + possible_neg_predictions

#get labels for all patients that have IV and cultures
adj_ground_truth = ground_truth.loc[all_possible_]

# df for holding results
col = ["experiment", "AUROC", "AUROC_adj","precision_raw", "precision_adj", "recall_raw", "recall_adj", "f1_raw", "f1_adj", "support", "support_adj", "cm", "cm_adj"]
df = pd.DataFrame(columns=col)

In [6]:
def compute_results(path):
    # load results and get predicted arrays

    df = convert_result_to_df(path)

    # get ts_indexes of False and True Sepsis labels in results
    noSeps = df.loc[df["Sepsis"] == "False"]
    Seps = df.loc[df["Sepsis"] == "True"]
    neg_hadm_IDs = list(map(int, noSeps["ts_ind"]))
    pos_hadm_IDs = list(map(int, Seps["ts_ind"]))

    #make predicted df for scoring
    predicted = pd.DataFrame(index=df.index, columns=["pred"])
    predicted[predicted.index.isin(neg_hadm_IDs)] = 0
    predicted[predicted.index.isin(pos_hadm_IDs)] = 1

    # get all predictions for all patients that have IV and cultures
    adj_predicted = predicted.loc[all_possible_]



    # precision, recall f1 (fbeta=1.0) on raw data
    precision, recall, f1_score, support = score(y_true=ground_truth.values.astype(int), y_pred=predicted["pred"].values.astype(int), average="weighted")
    auroc_score = auroc(y_true=ground_truth.values.astype(int), y_score=predicted["pred"].values.astype(int), average="weighted")
    cm=confusion_matrix(ground_truth.values.astype(int), predicted["pred"].values.astype(int)).ravel()

    # precision, recall f1 (fbeta=1.0) on patients that include IV and cultures
    precision_adj, recall_adj, f1_score_adj, support_adj = score(y_true=adj_ground_truth.values.astype(int), y_pred=adj_predicted["pred"].values.astype(int), average="weighted")
    auroc_score_adj = auroc(y_true=adj_ground_truth.values.astype(int), y_score=adj_predicted["pred"].values.astype(int), average="weighted")
    cm_adj=confusion_matrix(adj_ground_truth.values.astype(int), adj_predicted["pred"].values.astype(int)).ravel()


    return auroc_score, auroc_score_adj, precision, recall, f1_score, support, precision_adj, recall_adj, f1_score_adj, support_adj, cm, cm_adj

In [7]:
results_path = "./results/on_finetune_data/"
directory = os.fsencode(results_path)
for file in os.listdir(directory):
    auroc_score, auroc_score_adj, precision, recall, f1_score, support, precision_adj, recall_adj, f1_score_adj, support_adj, cm, cm_adj = compute_results(os.path.join(directory, file))
    new_row = {"experiment":file,"AUROC":auroc_score,"AUROC_adj":auroc_score_adj,"precision_raw":precision, "precision_adj":precision_adj, "recall_raw":recall, "recall_adj":recall_adj, "f1_raw":f1_score, "f1_adj":f1_score_adj, "support":support, "support_adj":support_adj, "cm": cm, "cm_adj":cm_adj}
    df2 = df.append(new_row, ignore_index=True)
    df2.to_csv('F1_report.csv', mode='a', index=False, header=False)

  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)
  df2 = df.append(new_row, ignore_index=True)


In [3]:
col = ["experiment", "AUROC", "AUROC_adj","precision_raw", "precision_adj", "recall_raw", "recall_adj", "f1_raw", "f1_adj", "support", "support_adj", "cm", "cm_adj"]
results = pd.read_csv("F1_report.csv", names=col, header=None)
results

Unnamed: 0,experiment,AUROC,AUROC_adj,precision_raw,precision_adj,recall_raw,recall_adj,f1_raw,f1_adj,support,support_adj,cm,cm_adj
0,b'1_reyna_ffill_reyna_24-12_240-240.txt',0.607345,0.562775,0.706016,0.597679,0.715142,0.612219,0.674993,0.602054,,,[9833 722 3791 1497],[ 437 722 503 1497]
1,b'1_reyna_ffill_reyna_48-24_168-168.txt',0.609844,0.545806,0.702922,0.58437,0.714385,0.60842,0.677127,0.588511,,,[9756 799 3726 1562],[ 360 799 438 1562]
2,b'1_reyna_ffill_reyna_48-24_24-72.txt',0.581873,0.528185,0.690236,0.561153,0.701509,0.543843,0.648939,0.550171,,,[9940 615 4114 1174],[ 544 615 826 1174]
3,b'1_reyna_ffill_reyna_6-3_1-3.txt',0.512284,0.505536,0.669224,0.556099,0.671653,0.394112,0.554551,0.291158,,,[10467 88 5114 174],[1071 88 1826 174]
4,b'2sepsis-3_24-12_240-240.txt',0.61495,0.559919,0.707517,0.601708,0.717856,0.625831,0.68208,0.602861,,,[9758 797 3673 1615],[ 362 797 385 1615]
5,b'2sepsis-3_48-24_168-168.txt',0.617731,0.541862,0.704789,0.588605,0.717099,0.622032,0.684232,0.584406,,,[9675 880 3602 1686],[ 279 880 314 1686]
6,b'2sepsis-3_48-24_24-72.txt',0.588768,0.523761,0.691115,0.557414,0.703781,0.555239,0.656263,0.556288,,,[9866 689 4004 1284],[ 470 689 716 1284]
7,b'2sepsis-3_6-3_1-3.txt',0.513654,0.506404,0.67011,0.557156,0.672284,0.397278,0.556833,0.30018,,,[10458 97 5095 193],[1062 97 1807 193]
8,b'3sepsis-3_24-12_240-240.txt',0.633591,0.502971,0.704965,0.591482,0.719371,0.633428,0.696414,0.499464,,,[9411 1144 3302 1986],[ 15 1144 14 1986]
9,b'3sepsis-3_48-24_168-168.txt',0.633213,0.502584,0.704739,0.574852,0.719182,0.632479,0.696101,0.500071,,,[9413 1142 3307 1981],[ 17 1142 19 1981]


In [None]:
x = results[["experiment", "AUROC", "AUROC_adj","f1_raw", "f1_adj", "precision_adj", "recall_adj"]].sort_values(["AUROC"], ascending=False)
x

AUROC and F1 raw are identically ordered in this case. They both prefer a setting that identifies the most positives. At the cost of almost always predicting positive when it has the relevant features (cm_adj) -> AUROC and F1 raw are not suitable for comparison.

In [15]:
# tn, fp, fn, tp
y = results[["experiment", "AUROC","f1_raw", "AUROC_adj","f1_adj","cm","cm_adj"]].sort_values(["AUROC"], ascending=False)
y

Unnamed: 0,experiment,AUROC,f1_raw,AUROC_adj,f1_adj,cm,cm_adj
8,b'3sepsis-3_24-12_240-240.txt',0.633591,0.696414,0.502971,0.499464,[9411 1144 3302 1986],[ 15 1144 14 1986]
9,b'3sepsis-3_48-24_168-168.txt',0.633213,0.696101,0.502584,0.500071,[9413 1142 3307 1981],[ 17 1142 19 1981]
10,b'3sepsis-3_48-24_24-72.txt',0.630428,0.694069,0.508384,0.520099,[9456 1099 3358 1930],[ 60 1099 70 1930]
13,b'4sepsis-3_48-24_168-168.txt',0.619574,0.685881,0.542754,0.584687,[9662 893 3576 1712],[ 266 893 288 1712]
5,b'2sepsis-3_48-24_168-168.txt',0.617731,0.684232,0.541862,0.584406,[9675 880 3602 1686],[ 279 880 314 1686]
4,b'2sepsis-3_24-12_240-240.txt',0.61495,0.68208,0.559919,0.602861,[9758 797 3673 1615],[ 362 797 385 1615]
12,b'4sepsis-3_24-12_240-240.txt',0.614948,0.682013,0.556242,0.599358,[9746 809 3667 1621],[ 350 809 379 1621]
11,b'3sepsis-3_6-3_1-3.txt',0.614092,0.680844,0.535608,0.578666,[9686 869 3646 1642],[ 290 869 358 1642]
1,b'1_reyna_ffill_reyna_48-24_168-168.txt',0.609844,0.677127,0.545806,0.588511,[9756 799 3726 1562],[ 360 799 438 1562]
0,b'1_reyna_ffill_reyna_24-12_240-240.txt',0.607345,0.674993,0.562775,0.602054,[9833 722 3791 1497],[ 437 722 503 1497]


f1_adj picks settings that get most of the possible positives right, and significantly more negatives than when sorting by AUROC/F1 raw. We can see that sus and sep windows are more important than configuration(sepsis-3,reyna,grouped,catchsus). 

In [16]:
# tn, fp, fn, tp
y.sort_values(["f1_adj"], ascending=False)

Unnamed: 0,experiment,AUROC,f1_raw,AUROC_adj,f1_adj,cm,cm_adj
4,b'2sepsis-3_24-12_240-240.txt',0.61495,0.68208,0.559919,0.602861,[9758 797 3673 1615],[ 362 797 385 1615]
0,b'1_reyna_ffill_reyna_24-12_240-240.txt',0.607345,0.674993,0.562775,0.602054,[9833 722 3791 1497],[ 437 722 503 1497]
12,b'4sepsis-3_24-12_240-240.txt',0.614948,0.682013,0.556242,0.599358,[9746 809 3667 1621],[ 350 809 379 1621]
1,b'1_reyna_ffill_reyna_48-24_168-168.txt',0.609844,0.677127,0.545806,0.588511,[9756 799 3726 1562],[ 360 799 438 1562]
13,b'4sepsis-3_48-24_168-168.txt',0.619574,0.685881,0.542754,0.584687,[9662 893 3576 1712],[ 266 893 288 1712]
5,b'2sepsis-3_48-24_168-168.txt',0.617731,0.684232,0.541862,0.584406,[9675 880 3602 1686],[ 279 880 314 1686]
14,b'4sepsis-3_48-24_24-72.txt',0.603465,0.671021,0.540881,0.581566,[9795 760 3813 1475],[ 399 760 525 1475]
11,b'3sepsis-3_6-3_1-3.txt',0.614092,0.680844,0.535608,0.578666,[9686 869 3646 1642],[ 290 869 358 1642]
6,b'2sepsis-3_48-24_24-72.txt',0.588768,0.656263,0.523761,0.556288,[9866 689 4004 1284],[ 470 689 716 1284]
2,b'1_reyna_ffill_reyna_48-24_24-72.txt',0.581873,0.648939,0.528185,0.550171,[9940 615 4114 1174],[ 544 615 826 1174]
