In [1]:
import pandas as pd
import numpy as np
import os
import sys

sys.path.append('./../src/')
from manuscript import sankey_side_by_side as sankey
from manuscript import datasets, inout, export, stats

pd.set_option('display.max_columns', None)

#xgboost
from xgboost import XGBClassifier
from xgboost import plot_importance

# Evaluation of models
from sklearn.model_selection import train_test_split
from sklearn.metrics import roc_curve, roc_auc_score, auc
from sklearn.calibration import calibration_curve
from sklearn.metrics import accuracy_score
import matplotlib.pyplot as plt
from sklearn.metrics import precision_recall_curve
from sklearn.metrics import f1_score


  from pandas import MultiIndex, Int64Index


In [2]:
user = 'CAG'     # defines top hierarchy of output folder
outfolder = '31_worst_two'    # name of notebook

def dump_table(df, name):
    export.full_frame(
        user, 
        f'{outfolder}/{name}', 
        df, 
        index=True,
        date=True
    )

def dump_figure(name):
    export.image(
        user,
        f'{outfolder}/{name}',
    )

In [3]:
data = pd.read_csv(
    inout.get_material_path('general/30_VAP_flags/data_vap_flags_220919_1403.csv.gz'), 
    index_col=0)

In [4]:
#take the worst value from the first two days 

In [5]:
highbad=['Patient_id','Binary_outcome',#for models later
    'ECMO_flag', 'Intubation_flag', 'Hemodialysis_flag',
       'CRRT_flag', 'Temperature', 'Heart_rate', 
       'Norepinephrine_flag', 'Norepinephrine_rate', 'Respiratory_rate',
        'PEEP',
       'FiO2', 'Plateau_Pressure', 'Lung_Compliance', 'PEEP_changes',
       'Respiratory_rate_changes', 'FiO2_changes',  'ABG_PaCO2',
        'WBC_count', 'Lymphocytes', 'Neutrophils',
       'Creatinine', 
       'Bilirubin', 'CRP', 'D_dimer', 'Ferritin', 'LDH', 'Lactic_acid',
       'Procalcitonin', 'adjudicated','had_nonviral_vap', 'vap_indeterminate_notcured']

lowbad=['Patient_id',
    'Systolic_blood_pressure',
       'Diastolic_blood_pressure', 'Mean_arterial_pressure','Oxygen_saturation', 'Urine_output', 'GCS_eye_opening',
       'GCS_motor_response', 'GCS_verbal_response', 'RASS_score','ABG_pH','ABG_PaO2', 'PaO2FIO2_ratio',
    'Hemoglobin', 'Platelets', 'Bicarbonate', 'Albumin',
]

In [5]:
firsttwodays = data.groupby('Patient_id', as_index=False).head(2)

In [7]:
firsttwohigh = firsttwodays[highbad].groupby('Patient_id', as_index=False).max()
firsttwolow = firsttwodays[lowbad].groupby('Patient_id', as_index=False).min()

In [8]:
worsttwo = pd.merge(firsttwohigh, firsttwolow, on='Patient_id') 

In [9]:
#take only patients who had adjudication data
worsttwo_adjudicated = worsttwo[worsttwo.adjudicated==1]

In [23]:
worsttwo_adjudicated.to_csv("worsttwo_adjudicated8-8-22.csv")

In [11]:
dump_table(worsttwo_adjudicated, 'data_worsttwo_adjudicated.csv.gz')

In [7]:
lasttwodays = data.groupby('Patient_id', as_index=False).tail(2)

In [8]:
lasttwohigh = lasttwodays[highbad].groupby('Patient_id', as_index=False).max()
lasttwolow = lasttwodays[lowbad].groupby('Patient_id', as_index=False).min()

In [9]:
last_worsttwo = pd.merge(lasttwohigh, lasttwolow, on='Patient_id') 

In [10]:
#take only patients who had adjudication data
last_worsttwo_adjudicated = last_worsttwo[last_worsttwo.adjudicated==1]

In [11]:
dump_table(last_worsttwo_adjudicated, 'data_last_worsttwo_adjudicated.csv.gz')

In [8]:
def middle(x):
     if len(x) % 2 == 0:
         return x.iloc[int(len(x) / 2) - 1:int(len(x) / 2)+1]
     else:
         return x.iloc[int((len(x) / 2 - 0.5)) - 1:int(len(x) / 2)+1]

middle = pd.concat([middle(y) for _ , y in data.groupby('Patient_id')])

In [10]:
middlehigh = middle[highbad].groupby('Patient_id', as_index=False).max()
middlelow = middle[lowbad].groupby('Patient_id', as_index=False).min()
middletwo = pd.merge(middlehigh, middlelow, on='Patient_id') 
#take only patients who had adjudication data
middletwo_adjudicated = middletwo[middletwo.adjudicated==1]
dump_table(middletwo_adjudicated, 'middletwo_adjudicated.csv.gz')