In [28]:
import pandas as pd
import plotly.graph_objects as go
from plotly.subplots import make_subplots
import numpy as np

In [3]:
def load_drug(drug_path, patient):
    drug = pd.read_csv(drug_path)
    patients_weight = patient[["patientunitstayid", "admissionweight"]]
    drug = drug.merge(patients_weight, on="patientunitstayid")
    # add hospitalID and unitType and wardID to the drug table corelated to patientunitstayid
    drug["hospitalid"] = drug["patientunitstayid"].map(patient.set_index("patientunitstayid")["hospitalid"])
    drug["unittype"] = drug["patientunitstayid"].map(patient.set_index("patientunitstayid")["unittype"])
    drug["wardid"] = drug["patientunitstayid"].map(patient.set_index("patientunitstayid")["wardid"])
    # filter drug from NA in drugname, patientunitstayid, hospitalid, unittype, wardid
    drug = drug[~drug['drugname'].isna()]
    drug = drug[~drug['patientunitstayid'].isna()]
    drug = drug[~drug['hospitalid'].isna()]
    drug = drug[~drug['unittype'].isna()]
    drug = drug[~drug['wardid'].isna()]

    #filter drugs with other drug names
    other_drugs = drug[drug['drugname'].str.startswith(("Epinephrine", "Dopamine", "Vesopressin"))]

    #filter drug to include only drugnames that start with "Norepinephrine"
    drug = drug[drug['drugname'].str.startswith("Norepinephrine")]

    #remove patients who recieved other vasopressors
    drug = drug[~drug['patientunitstayid'].isin(other_drugs['patientunitstayid'])]
    # sort drug by patientunitstayid and drugstartoffset
    drug = drug.sort_values(by=["patientunitstayid", "infusionoffset"])
    return drug

In [4]:
def load_bp(bp_path, drug,patient):
    bp = pd.read_csv("../preprocess/filtered_bp_eicu.csv")
    bp = bp.sort_values(by=["stay_id", "cur_bp_time"])
    bp = bp[bp["stay_id"].isin(drug["patientunitstayid"])]
    bp["next_bp_time"] = bp.groupby("stay_id")["cur_bp_time"].shift(-1)
    bp["interval"] = bp["next_bp_time"] - bp["cur_bp_time"]
    drug["drugrate"] = pd.to_numeric(drug["drugrate"], errors='coerce')
    bp["age"] = pd.to_numeric(bp["stay_id"].map(patient.set_index("patientunitstayid")["age"]), errors='coerce')
    bp["hospitalid"] = bp["stay_id"].map(patient.set_index("patientunitstayid")["hospitalid"])
    bp["unittype"] = bp["stay_id"].map(patient.set_index("patientunitstayid")["unittype"])
    bp["wardid"] = bp["stay_id"].map(patient.set_index("patientunitstayid")["wardid"])
    # create a new column for the number of patients per hospital
    # bp["pat_per_hos"] = bp["hospitalid"].map(bp.groupby(["hospitalid"]).agg({"stay_id": ["nunique"]}).sort_values(by=("stay_id", "nunique")))
    pat_per_hos = bp.groupby(["hospitalid"]).agg({"stay_id": ["nunique"]}).sort_values(by=("stay_id", "nunique"))
    bp["pat_per_hos"] = bp["hospitalid"].map(pat_per_hos[("stay_id", "nunique")])  
    return bp


In [5]:
def load_data(drug_path, patient_path, diagnosis_path, bp_path):
    patient = pd.read_csv("../data/eICU/patient.csv")
    drug = load_drug("../data/eICU/infusiondrug.csv", patient)
    diagnosis = pd.read_csv("../data/eICU/diagnosis.csv")
    bp = load_bp("../preprocess/filtered_bp_eicu.csv", drug, patient)
    return drug, diagnosis, patient, bp

In [6]:
# create a plot of bp over time for each patient using plotly subplot and add to each patient plot the norepinephrine infusion from drug table as scatter plot
def bp_and_nor_over_time(n_patients, bp_big, drug):
    for i in range(n_patients):
        # assign random patient to pat
        pat = bp_big["stay_id"].value_counts()[bp_big["stay_id"].value_counts() > 0].sample(5).index[0]
        fig = go.Figure()
        fig.add_trace(go.Scatter(x=bp_big[bp_big["stay_id"] == pat]["cur_bp_time"], y=bp_big[bp_big["stay_id"] == pat]["cur_bp"], name="MAP"))
        # scatter plot of norepinephrine infusion, not line plot
        # fig.add_trace(go.Scatter(x=drug[drug["patientunitstayid"] == pat]["infusionoffset"], y=drug[drug["patientunitstayid"] == pat]["drugrate"], name="drug", mode="markers", marker=dict(color="red")), row=i+1, col=1)
        x = drug[drug["patientunitstayid"] == pat]["infusionoffset"]
        z = drug[drug["patientunitstayid"] == pat]["drugrate"]
        # make z values between 0 and 10
        z2 = (z - z.min()) / (z.max() - z.min()) * 10
        # set y as a liitle under the min of bp and higher drugrate be higher
        y = bp_big[bp_big["stay_id"] == pat]["cur_bp"].min() - 10 + z2
        fig.add_trace(go.Scatter(x=x, y=y * np.ones(x.size), name="NOR", mode="markers", marker=dict(color=z,colorscale='Viridis',
            colorbar=dict(title='NOR')
        ),
        )) # TODO added drug amount as color
        fig.update_layout(title_text="bp over time for 5 patients")
        fig.write_image(f"bp{i}.png")
        # create a histogram of bp values for the patient
        fig = go.Figure()
        fig.add_trace(go.Histogram(x=bp_big[bp_big["stay_id"] == pat]["cur_bp"], name="bp"))
        fig.update_layout(title_text="bp over time for patient")
        fig.write_image(f"bp_hist{i}.png")



In [35]:
# create plot of raw bp rolling median and residuals
def rolling_median_graphs(n_patients, bp_big, n_measurments, window_size, start_measurment):
    for i in range(n_patients):

        # assign random patient to pat
        pat = bp_big["stay_id"].value_counts()[bp_big["stay_id"].value_counts() > 0].index[i]
        fig = make_subplots(rows=5, cols=1)

        time = bp_big[bp_big["stay_id"] == pat]["cur_bp_time"]
        time = time[start_measurment[i]:start_measurment[i] + n_measurments]
        raw_bp = bp_big[bp_big["stay_id"] == pat]["cur_bp"]
        raw_bp = raw_bp[start_measurment[i]:start_measurment[i] + n_measurments]
        data_ = pd.DataFrame(data={'time': time, 'raw_bp': raw_bp})

        for k, j in enumerate(window_size):

            # Apply rolling median
            data_[f'window_{j}'] = data_['raw_bp'].rolling(j, center=True).median()
            data_[f'dist_{j}'] = data_['raw_bp']-data_[f'window_{j}']
            data_[f'abs_dist{j}'] = np.abs(data_[f'dist_{j}'])
            dist_median = np.nanmedian(data_[f'abs_dist{j}'])
            data_[f'dist_median{j}'] = dist_median

            #raw data
            fig.add_trace(go.Scatter(x=data_['time'], y=data_['raw_bp'], mode='lines', marker=dict(color='blue'), name="raw_bp"), row=1, col=1)

            #raw data vs smooth data
            fig.add_trace(go.Scatter(x=data_['time'], y=data_['raw_bp'], mode='lines', marker=dict(color='blue'), name="raw_bp"), row=k+2, col=1)
            fig.add_trace(go.Scatter(x=data_['time'], y=data_[f'window_{j}'], mode='lines', name=f'window_{j}'), row=k+2, col=1)

            # dist with threshhold
            fig.add_trace(go.Scatter(x=data_['time'], y=data_[f'abs_dist{j}'], mode='lines', name=f'dist{j}'), row=5, col=1)
            # fig.add_trace(go.Scatter(x=data_['time'], y=data_['dist_median']*filter_size, mode='lines', name=f'threshold: {dist_median * filter_size}'), row=3, col=1)
            
            #raw with filtered
            # fig.add_trace(go.Scatter(x=data_['time'], y=data_['raw_bp'], mode='lines', name="raw_bp"), row=4, col=1)
            # data_ = data_[data_['abs_dist'] <= dist_median * filter_size]
            # fig.add_trace(go.Scatter(x=data_['time'], y=data_['raw_bp'], mode='lines', name="filtered_bp"), row=4, col=1)


        fig.update_layout(title_text=f"bp over time for patient {i}")
        fig.write_image(f"bp{i}.png")


In [22]:
# run
def run(bp, n_patients, n_measurments, window_size, start_measurment):
    bp_big  = bp[bp['pat_per_hos'] >= 50 ]
    print('unique satyId in bp_big: ', bp_big["stay_id"].nunique())
    # bp_and_nor_over_time(n_patients, bp_big, drug)
    rolling_median_graphs(n_patients, bp_big, n_measurments, window_size, start_measurment)

In [12]:
drug_path = "../data/eICU/infusiondrug.csv"
patient_path = "../data/eICU/patient.csv"
diagnosis_path = "../data/eICU/diagnosis.csv"
bp_path = "../preprocess/filtered_bp_eicu.csv"
drug, diagnosis, patient, bp = load_data(drug_path, patient_path, diagnosis_path, bp_path)
    


Columns (4) have mixed types.Specify dtype option on import or set low_memory=False.



In [36]:
seed_value = 48
np.random.seed(seed_value)
n_patients = 50
n_measurments = 200
start_measurment = np.random.randint(0, 1001, size=n_patients)
print(start_measurment)
window_size = [3,5,10]
run(bp, n_patients, n_measurments, window_size, start_measurment)


[512 563 337 452 347 944 832 966 454 290 348 208 236 669   6 214 927 814
 944 731 796 231 826 616 655 542 578 530 437 530 234  21 281 604 418 118
 795 828 162 429 127 810 988 177 917 390  47 272 293 492]
unique satyId in bp_big:  1951
