In [1]:
%matplotlib inline
import numpy as np
import scipy 
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.linear_model import LogisticRegression
from sklearn.svm import SVC

sns.set(color_codes=True)
sns.set(rc={"figure.figsize": (16, 8)}); 
np.random.seed(20170619)

In [2]:
wards = pd.read_excel("./Coll.centre key_v3.xlsx")
ICU_wards = wards[wards['ICU']==1.0]
General_wards = wards[wards['General Ward']==1.0]
exclude = wards[wards['To exclude']==1.0]
location = 'Coll.Centre'
targets = list(ICU_wards[location])
list(ICU_wards[location])

['NICU', 'PAED HDU', 'PICU']

In [3]:
data = pd.read_pickle("./data9.pkl")

In [4]:
# 'Lab test name': [[current value min, max], [proposed value min, max]]
valid_labs = {
    'SODIUM':[[130.0,150.0], [130.62, 148.36]], 
    'BICARBONATE':[[15.0, 35.0], [11.97, 37.12]], 
    'CHLORIDE':[[90.0, 115.0], [91.11, 115.35]], 
    'GLUCOSE':[[2.5, 9.0], [False, 17.66]],
    'LACTATE':[[False, 10.0], [False, 5.13]], 
    'MAGNESIUM':[[0.6, 1.3], [0.57, 1.22]], 
    'PHOSPHATE':[[0.5, 3.0], [0.33, 2.65]], 
    'POTASSIUM':[[3.0, 6.0], [2.29, 6.41]], 
    #'CORRECTED CALCIUM':[[]], 
    'UREA':[[False, 12.0], [False, 12.04]], 
    'SERUM CREAT':[[False, 120.0], [False, 131.31]],
    'CALCIUM TOTAL':[[1.3, 3.0], [1.9, False]]
}

In [1]:
data['included']

In [12]:
for lab, tworange in valid_labs.items():
    _min = lab + "_min"
    _max = lab + "_max"
    
    #if lab == "SODIUM":
    curr_min = valid_labs[lab][0][0] if valid_labs[lab][0][0] else False
    curr_max = valid_labs[lab][0][1] if valid_labs[lab][0][1] else False
    prop_min = valid_labs[lab][1][0] if valid_labs[lab][1][0] else False
    prop_max = valid_labs[lab][1][1] if valid_labs[lab][1][1] else False
        
    select_curr_max = [ i and j for i, j in zip(data['included'], data[_max] >= curr_max)] if curr_max else [False for i in data['included']]
    select_curr_min =  [ i and j for i, j in zip(data['included'], data[_min] <= curr_min)] if curr_min else [False for i in data['included']]
    select_prop_max = [ i and j for i, j in zip(data['included'], data[_max] >= prop_max)] if curr_max else [False for i in data['included']]
    select_prop_min = [ i and j for i, j in zip(data['included'], data[_min] <= prop_min)] if curr_max else [False for i in data['included']]
        
    total = float(data.loc[data['included'], ['Research.ID', 'age', location, 'Collected', lab, lab+"_max", lab+"_min", 'included']].dropna()['Research.ID'].unique().shape[0])
    flaged_curr_min = float(data.loc[select_curr_min, 'Research.ID'].unique().shape[0])
    flaged_curr_max = float(data.loc[select_curr_max, 'Research.ID'].unique().shape[0])
    flaged_prop_min = float(data.loc[select_prop_min, 'Research.ID'].unique().shape[0])
    flaged_prop_max = float(data.loc[select_prop_max, 'Research.ID'].unique().shape[0])
        
    print("\n"+lab)
    print("Total patients transfered to ICU: {0}".format(total))
    print("Flaged by current min: {0}, {1}%".format(flaged_curr_min, np.round(flaged_curr_min / total * 100.0, decimals=2)))
    print("Flaged by proposed min: {0}, {1}%".format(flaged_prop_min, np.round(flaged_prop_min / total * 100.0, decimals=2)))
    print("Difference proposed - current: {0}".format(np.round(flaged_prop_min / total * 100.0, decimals=2)-np.round(flaged_curr_min / total * 100.0, decimals=2)))
    print("Flaged by current max: {0}, {1}%".format(flaged_curr_max, np.round(flaged_curr_max / total * 100.0, decimals=2)))
    print("Flaged by proposed max: {0}, {1}%".format(flaged_prop_max, np.round(flaged_prop_max / total * 100.0, decimals=2)))
    print("Difference proposed - current: {0}".format(np.round(flaged_prop_max / total * 100.0, decimals=2)-np.round(flaged_curr_max / total * 100.0, decimals=2)))


PHOSPHATE
Total patients transfered to ICU: 360.0
Flaged by current min: 9.0, 2.5%
Flaged by proposed min: 4.0, 1.11%
Difference proposed - current: -1.39
Flaged by current max: 2.0, 0.56%
Flaged by proposed max: 2.0, 0.56%
Difference proposed - current: 0.0

CHLORIDE
Total patients transfered to ICU: 668.0
Flaged by current min: 7.0, 1.05%
Flaged by proposed min: 11.0, 1.65%
Difference proposed - current: 0.5999999999999999
Flaged by current max: 37.0, 5.54%
Flaged by proposed max: 23.0, 3.44%
Difference proposed - current: -2.1

LACTATE
Total patients transfered to ICU: 16.0
Flaged by current min: 0.0, 0.0%
Flaged by proposed min: 0.0, 0.0%
Difference proposed - current: 0.0
Flaged by current max: 0.0, 0.0%
Flaged by proposed max: 2.0, 12.5%
Difference proposed - current: 12.5

SERUM CREAT
Total patients transfered to ICU: 667.0
Flaged by current min: 0.0, 0.0%
Flaged by proposed min: 0.0, 0.0%
Difference proposed - current: 0.0
Flaged by current max: 20.0, 3.0%
Flaged by proposed m

In [11]:
for lab, tworange in valid_labs.items():
    _min = lab + "_min"
    _max = lab + "_max"
    
    #if lab == "SODIUM":
    curr_min = valid_labs[lab][0][0] if valid_labs[lab][0][0] else False
    curr_max = valid_labs[lab][0][1] if valid_labs[lab][0][1] else False
    prop_min = valid_labs[lab][1][0] if valid_labs[lab][1][0] else False
    prop_max = valid_labs[lab][1][1] if valid_labs[lab][1][1] else False
        
    select_curr_max = [ i  for i in data[_max] >= curr_max] if curr_max else [False for i in data['included']]
    select_curr_min =  [ i  for i in data[_min] <= curr_min] if curr_min else [False for i in data['included']]
    select_prop_max = [ i  for i in data[_max] >= prop_max] if curr_max else [False for i in data['included']]
    select_prop_min = [ i for i in data[_min] <= prop_min] if curr_max else [False for i in data['included']]
        
    total = float(data.loc[:, ['Research.ID', 'age', location, 'Collected', lab, 'included']].dropna()['Research.ID'].unique().shape[0])
    flaged_curr_min = float(data.loc[select_curr_min, 'Research.ID'].unique().shape[0])
    flaged_curr_max = float(data.loc[select_curr_max, 'Research.ID'].unique().shape[0])
    flaged_prop_min = float(data.loc[select_prop_min, 'Research.ID'].unique().shape[0])
    flaged_prop_max = float(data.loc[select_prop_max, 'Research.ID'].unique().shape[0])
        
    print("\n"+lab)
    print("Total patients: {0}".format(total))
    print("Flaged by current min: {0}, {1}%".format(flaged_curr_min, np.round(flaged_curr_min / total * 100.0, decimals=2)))
    print("Flaged by proposed min: {0}, {1}%".format(flaged_prop_min, np.round(flaged_prop_min / total * 100.0, decimals=2)))
    print("Difference proposed - current: {0}".format(np.round(flaged_prop_min / total * 100.0, decimals=2)-np.round(flaged_curr_min / total * 100.0, decimals=2)))
    print("Flaged by current max: {0}, {1}%".format(flaged_curr_max, np.round(flaged_curr_max / total * 100.0, decimals=2)))
    print("Flaged by proposed max: {0}, {1}%".format(flaged_prop_max, np.round(flaged_prop_max / total * 100.0, decimals=2)))
    print("Difference proposed - current: {0}".format(np.round(flaged_prop_max / total * 100.0, decimals=2)-np.round(flaged_curr_max / total * 100.0, decimals=2)))



PHOSPHATE
Total patients: 7823.0
Flaged by current min: 54.0, 0.69%
Flaged by proposed min: 17.0, 0.22%
Difference proposed - current: -0.47
Flaged by current max: 9.0, 0.12%
Flaged by proposed max: 27.0, 0.35%
Difference proposed - current: 0.22999999999999998

CHLORIDE
Total patients: 18296.0
Flaged by current min: 36.0, 0.2%
Flaged by proposed min: 47.0, 0.26%
Difference proposed - current: 0.06
Flaged by current max: 139.0, 0.76%
Flaged by proposed max: 102.0, 0.56%
Difference proposed - current: -0.19999999999999996

LACTATE
Total patients: 1739.0
Flaged by current min: 0.0, 0.0%
Flaged by proposed min: 0.0, 0.0%
Difference proposed - current: 0.0
Flaged by current max: 2.0, 0.12%
Flaged by proposed max: 22.0, 1.27%
Difference proposed - current: 1.15

SERUM CREAT
Total patients: 18276.0
Flaged by current min: 0.0, 0.0%
Flaged by proposed min: 0.0, 0.0%
Difference proposed - current: 0.0
Flaged by current max: 124.0, 0.68%
Flaged by proposed max: 100.0, 0.55%
Difference proposed 