In [171]:
import pandas as pd
import numpy as np
import cloudpickle

## Load models

In [172]:
model_path = "../temp_sets_100/100_models/"

In [173]:
with open(model_path + 'patients_model.pkl', 'rb') as f:
    patients_model = cloudpickle.load(f)

In [174]:
with open(model_path + 'admissions_model.pkl', 'rb') as f:
    admissions_model = cloudpickle.load(f)

In [175]:
with open(model_path + 'callout_model.pkl', 'rb') as f:
    callout_model = cloudpickle.load(f)

In [176]:
with open(model_path + 'icustays_model.pkl', 'rb') as f:
    icustays_model = cloudpickle.load(f)

---

## Generate data

In [177]:
patients_df = patients_model.sample(100)

Sampling rows: 100%|██████████| 100/100 [00:00<00:00, 6200.83it/s]


In [178]:
admissions_df = admissions_model.sample(130)

Sampling rows: 100%|██████████| 130/130 [00:00<00:00, 648.54it/s] 


In [179]:
callout_df = callout_model.sample(80)

In [180]:
icustays_df = icustays_model.sample(140)

In [181]:
import random

def add_subject_id(df, subject_ids, live_ids):

    # No death data, 
    if pd.isna(df['deathtime']):
        if df['index'] < (len(subject_ids) - 1):
            return subject_ids[df['index']%(len(subject_ids) - 1)]
        else:
            return random.choice(subject_ids)
    else:
        # if match a death data, choose a patients from live list, and delete it from live list(because one person can just dead once)
        subject_id = live_ids.pop()
        return subject_id

## Post-process data

In [182]:
# Reset time data
def reset_time(df, early_col_name, late_col_name, second_early_col_name=None):
    '''
    If first_early_col_name exist, then use late_col - first_early_col_name, else, use then use late_col - second_early_col_name, else set result as NaN
    The result is the time delta, save it as the late column
    '''
    # print(df['hadm_id'])
    # print(df[late_col_name])
    # print(df[early_col_name])

    if pd.isna(df[late_col_name]):
        return pd.NaT
    elif (pd.isna(df[late_col_name]) == False) & pd.isna(df[early_col_name]):
        return df[second_early_col_name] + pd.Timedelta(seconds=int(df[late_col_name]))
    else:
        return df[early_col_name] + pd.Timedelta(seconds=int(df[late_col_name]))

### Admissions

In [183]:
# save the patients who don't have death record. When one patients match a death date, then delete it from the list
live_ids = patients_df['subject_id'].tolist()
admissions_df['subject_id'] = admissions_df.reset_index().apply(add_subject_id, args=(patients_df['subject_id'].tolist(), live_ids), axis=1)

In [184]:
# Adjust `subject_id` position
admissions_df.insert(0, 'subject_id', admissions_df.pop('subject_id'))
# Change type of `subject_id`
admissions_df['subject_id'] = admissions_df['subject_id'].astype(int)

In [185]:
# Merge patients info and admissions info
admissions_df = pd.merge(patients_df.loc[:, ['subject_id', 'dob']], admissions_df, how='inner', on=['subject_id'])

In [186]:
admissions_df['admittime'] = admissions_df.apply(reset_time, args=('dob', 'admittime'), axis=1)
admissions_df['dischtime'] = admissions_df.apply(reset_time, args=('admittime', 'dischtime'), axis=1)
admissions_df['edregtime'] = admissions_df.apply(reset_time, args=('admittime', 'edregtime'), axis=1)
admissions_df['edouttime'] = admissions_df.apply(reset_time, args=('edregtime', 'edouttime'), axis=1)

In [187]:
admissions_df.drop(['dob'], axis=1, inplace=True)

###  Callout

In [188]:
callout_df.head()

Unnamed: 0,submit_wardid,curr_wardid,curr_careunit,callout_wardid,callout_service,request_tele,request_resp,request_cdiff,request_mrsa,request_vre,callout_status,callout_outcome,discharge_wardid,acknowledge_status,createtime,updatetime,acknowledgetime,outcometime
0,37,2,MICU,1,TRAUM,0,0,0,0,0,Inactive,Discharged,21.0,Acknowledged,718131.0,3846.0,6422.0,35979.0
1,10,37,MICU,55,MED,0,0,0,0,0,Inactive,Discharged,15.0,Acknowledged,1492046.0,4747.0,,49017.0
2,7,24,SICU,31,SURG,1,0,0,0,0,Inactive,Discharged,24.0,Acknowledged,170205.0,12909.0,3002.0,54913.0
3,43,2,CSRU,23,NSURG,1,0,0,0,0,Inactive,Discharged,40.0,Acknowledged,90691.0,11819.0,48765.0,31978.0
4,40,15,SICU,33,MED,1,0,0,0,0,Inactive,Cancelled,28.0,Acknowledged,631033.0,9276.0,57804.0,17233.0


In [189]:
# Pick up 'subject_id', 'hadm_id' and 'admittime' from admissions_df (random sample)
hadm_ids_df = admissions_df.loc[:, ['subject_id', 'hadm_id', 'admittime']]

In [190]:
# Add 'subject_id', 'hadm_id' to callout_df
callout_df = pd.concat([hadm_ids_df.sample(n=len(callout_df), replace=True, axis=0).reset_index(drop=True), callout_df], axis=1)

In [191]:
callout_df['createtime'] = callout_df.apply(reset_time, args=('admittime', 'createtime'), axis=1)
callout_df['updatetime'] = callout_df.apply(reset_time, args=('createtime', 'updatetime'), axis=1)
callout_df['acknowledgetime'] = callout_df.apply(reset_time, args=('updatetime', 'acknowledgetime'), axis=1)
callout_df['outcometime'] = callout_df.apply(reset_time, args=('acknowledgetime', 'outcometime', 'updatetime'), axis=1)

In [192]:
callout_df.drop(['admittime'], axis=1, inplace=True)

### icustays

In [193]:
icustays_df

Unnamed: 0,icustay_id,dbsource,first_careunit,last_careunit,first_wardid,last_wardid,intime,outtime
0,251662,carevue,CCU,SICU,21,37,107598.0,322519.0
1,230664,carevue,NICU,CSRU,19,57,155378.0,71211.0
2,255981,carevue,NICU,CCU,57,44,118678.0,3120.0
3,255271,metavision,NICU,MICU,7,31,48276.0,3120.0
4,201891,carevue,CSRU,MICU,32,57,73617.0,476375.0
...,...,...,...,...,...,...,...,...
135,224816,carevue,SICU,CSRU,12,57,31.0,3120.0
136,235640,carevue,SICU,MICU,19,57,11422.0,121297.0
137,254375,carevue,NICU,MICU,57,26,31.0,3120.0
138,259200,carevue,SICU,SICU,33,22,78915.0,277144.0


In [194]:
# Add 'subject_id', 'hadm_id' to icustays_df
icustays_df = pd.concat([hadm_ids_df.sample(n=len(icustays_df), replace=True, axis=0).reset_index(drop=True), icustays_df], axis=1)

In [195]:
icustays_df['intime'] = icustays_df.apply(reset_time, args=('admittime', 'intime'), axis=1)
icustays_df['outtime'] = icustays_df.apply(reset_time, args=('intime', 'outtime'), axis=1)

In [197]:
icustays_df.drop(['admittime'], axis=1, inplace=True)