In [None]:
!nvidia-smi

In [None]:
import tensorflow as tf
import numpy as np
import pandas as pd
import warnings
from sklearn.model_selection import train_test_split
import os
from tableone import TableOne, load_dataset
import pandas as pd

 
print(tf.__version__)

warnings.filterwarnings("ignore")

gpus = tf.config.list_physical_devices(device_type='GPU')
tf.config.set_visible_devices(devices=gpus[0], device_type='GPU')

In [None]:
seed = 42
os.environ['PYTHONHASHSEED']=str(seed)
tf.random.set_seed(seed)
np.random.seed(seed)

In [None]:
df_icu = pd.read_csv('Data/ICU_subject.csv')
df_icu.fillna(0, inplace=True)
df_icu

In [None]:
df_ed = pd.read_csv('Data/ED_subject.csv')
df_ed.fillna(0, inplace=True)
df_ed

In [None]:
unique_subject_id_icu = list(set(df_icu['icu_subject']))
unique_subject_id_ed = list(set(df_ed['ed_subject']))

In [None]:
demographic = pd.read_csv('Data/demographic.csv')
import random
from statistics import mode, StatisticsError

def most_common(l):
    if (len(l) == 1):
        return l[0]
    
    try:
        most_common = mode(l)
    except:
        most_common = random.choice(l)
    
    if (most_common == 3): 
        second_common_list = [x for x in l if x != most_common]
        if (len(second_common_list) > 0):
            most_common = mode(second_common_list)
    return most_common

In [None]:
from IPython import display
new_race_list = []
for i in demographic['race']:
    l = list(i)
    l = [x for x in l if x != ',']
    new_race_list.append(most_common(l))

In [None]:
demographic.drop('race',axis=1, inplace=True)
demographic['race'] = new_race_list

In [None]:
demographic.to_csv('Data/Processed_demo.csv')

In [None]:
# create a dataframe containing each patient uniquely
gender_array = []
age_array = []
races_array = []
copd_array = []
vs_array = []
miss = 0
for subject_id in unique_subject_id_icu:    
    try:
        gender = int(demographic.loc[demographic['subject_id'] == subject_id, 'gender'].values[0])
        age = int(demographic.loc[demographic['subject_id'] == subject_id, 'anchor_age'].values[0])
        race = int(demographic.loc[demographic['subject_id'] == subject_id, 'race'].values[0])
    except:
        miss += 0
        gender = np.nan
        age = np.nan
        race = np.nan
    copd_array.append(df_icu.loc[df_icu['icu_subject'] == subject_id, 'COPD'].values[0])
    vs_array.append(df_icu.loc[df_icu['icu_subject'] == subject_id, 'ventilation_status'].values[0])
    gender_array.append(gender)
    age_array.append(age)
    races_array.append(race)

unique_df_icu = pd.DataFrame({'subject_id':unique_subject_id_icu, 'gender': gender_array, 
                              'age':age_array, 'race':races_array, 
                              'COPD':copd_array, 'ventilation_status': vs_array
                             })
print(miss)
unique_df_icu

In [None]:
# create a dataframe containing each patient uniquely
gender_array = []
age_array = []
races_array = []
copd_array = []
vs_array = []
miss = 0
for subject_id in unique_subject_id_ed:
    try:
        gender = int(demographic.loc[demographic['subject_id'] == subject_id, 'gender'].values[0])
        age = int(demographic.loc[demographic['subject_id'] == subject_id, 'anchor_age'].values[0])
        race = int(demographic.loc[demographic['subject_id'] == subject_id, 'race'].values[0])
    except:
        miss += 0
        gender = np.nan
        age = np.nan
        race = np.nan
    copd_array.append(df_ed.loc[df_ed['ed_subject'] == subject_id, 'COPD'].values[0])
    vs_array.append(df_ed.loc[df_ed['ed_subject'] == subject_id, 'ventilation_status'].values[0])
    gender_array.append(gender)
    age_array.append(age)
    races_array.append(race)

unique_df_ed = pd.DataFrame({'subject_id':unique_subject_id_ed, 'gender': gender_array, 
                              'age':age_array, 'race':races_array, 
                              'COPD':copd_array, 'ventilation_status': vs_array
                             })
print(miss)
unique_df_ed

In [None]:
unique_df_icu = unique_df_icu.replace({'ventilation_status': {np.nan: 0, 'Oxygen': 1, 'HighFlow': 2, 'Oxygen,HighFlow': 3}})
unique_df_icu

In [None]:
unique_df_ed = unique_df_ed.replace({'ventilation_status': {np.nan: 0, 'Oxygen': 1, 'HighFlow': 2,'Oxygen,HighFlow': 3}})
unique_df_ed

In [None]:
df_ed = pd.DataFrame()
df_icu = pd.DataFrame()
miss = 0
filename = ['TFrecords/mimic-tf-record{i}.tfrecords'.format(i=i) for i in range(24)]
raw_dataset = tf.data.TFRecordDataset(filename)

for raw_record in raw_dataset:
    #loads data  from the trecord all info 
    example = tf.train.Example() # subject id (contains several chest Xrays) 
    example.ParseFromString(raw_record.numpy()) #reads the example 
    

    subject_id = example.features.feature['subject_id'].int64_list.value[0] # change subject_id to type int
    try:
        df_ed = df_ed.append(unique_df_ed.loc[unique_df_ed['subject_id'] == subject_id], ignore_index=True)
    except:
        miss+=1

    try:
        df_icu = df_icu.append(unique_df_icu.loc[unique_df_icu['subject_id'] == subject_id], ignore_index=True)
    except:
        miss+=1


In [None]:
df_icu = df_icu.drop_duplicates()
df_icu

In [None]:
df_ed = df_ed.drop_duplicates()
df_ed

In [None]:
columns = ['gender', 'age', 'race', 'COPD', 'ventilation_status']
categorical = columns
groupby = 'COPD'
mytable = TableOne(df_icu, columns=columns, categorical=categorical,
                   groupby=groupby, pval=True)

In [None]:
mytable

In [None]:
columns = ['gender', 'age', 'race', 'COPD', 'ventilation_status']
categorical = columns
groupby = 'COPD'
mytable = TableOne(df_ed, columns=columns, categorical=categorical,
                   groupby=groupby, pval=True)
mytable
# 0: Female, 1: Male
# 0: White, 1: Black, 2: Latino, 3: others, 4: Asian
# 0: -20, 1: 20-40, 2:40-60, 3: 60-80, 4: 80-

In [None]:
a = np.array(df_icu['subject_id'])
b = np.array(df_ed['subject_id'])
concat = np.unique(np.concatenate((a, b), axis=0))

In [None]:
train, test = train_test_split(concat, test_size=0.2, random_state=42)
train, val = train_test_split(train, test_size=0.2, random_state=42)

In [None]:
np.savetxt('Data/COPD_val_list.csv', val, delimiter=",")
np.savetxt('Data/COPD_test_list.csv', test, delimiter=",")
np.savetxt('Data/COPD_train_list.csv', train, delimiter=",")