In [0]:
import tarfile, gzip
import pandas as pd
import numpy as np
import os as os
import json, glob
from collections import Counter
from collections import defaultdict
from itertools import chain
from datetime import datetime
from dateutil.relativedelta import relativedelta

pd.set_option('display.max_rows', 30)

In [0]:
if (os.path.exists("synthea-data-chf.tar.gz") == False):
    !wget http://public.gi.ucsc.edu/~rcurrie/synthea-data-chf.tar.gz
if (os.path.exists("synthea-data-myocardial-infarction.tar.gz") == False):
    !wget http://public.gi.ucsc.edu/~rcurrie/synthea-data-myocardial-infarction.tar.gz

In [0]:
class FHIRDataCHF:

    def __init__(self, path, sample_size=None):
        self.data = self.extract_codes(path, sample_size)

    # Constructs then returns fhircodes
    def extract_codes(self, path, sample_size=None):
        fhircodes = {}
        counter, n = 1, 1

        with tarfile.open(path, "r:gz") as tfile:
            for member in tfile:
                if (member.isdir()):
                    continue

                self.extract_patient(fhircodes, pd.read_json(tfile.extractfile(member)))

                if (sample_size == counter-1):
                    break
                if (counter == n):
                    print("Processed " + str(counter) + " files")
                    n = n*2
                counter = counter+1

        return fhircodes

    # Updates a dictionary from a single patient json
    def extract_patient(self, fhircodes, patient):
        id = patient['entry'][0]['resource']['id']
        # List of dates and codes
        patient_data = []
        # First discharge and re-hosp date
        patient_label = {}
        patient_label['chf_first_discharge'] = None
        patient_label['chf_rehosp'] = None
        
        for entry in patient['entry']:
            resource_type = entry['resource']['resourceType']
            
            if resource_type == "Encounter":
                start_date = entry['resource']['period']['start'][0:10]
                end_date = entry['resource']['period']['end'][0:10]
                try:
                    reason_code = entry['resource']['reasonCode'][0]['coding'][0]['display']
                except:
                    reason_code = entry['resource']['type'][0]['coding'][0]['display']
                # Get chf hospitalization dates
                class_code = entry['resource']['class']['code']
                if (class_code == "IMP" or class_code == "EMER") and reason_code == "Chronic congestive heart failure (disorder)": # hospitalized for chf
                    if patient_label['chf_first_discharge'] is None:
                        patient_label['chf_first_discharge'] = end_date
                    # Check if rehosp date is greater than 29 days from first discharge
                    elif patient_label['chf_rehosp'] is None and (pd.date_range(patient_label['chf_first_discharge'], start_date).shape[0] > 29):
                        patient_label['chf_rehosp'] = start_date
                        break
                # Add Code
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Observation":
                start_date = entry['resource']['effectiveDateTime'][0:10]
                reason_code = entry['resource']['code']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Procedure":
                start_date = entry['resource']['performedPeriod']['start'][0:10]
                reason_code = entry['resource']['code']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Condition":
                start_date = entry['resource']['onsetDateTime'][0:10]
                reason_code = entry['resource']['code']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Immunization":
                start_date = entry['resource']['occurrenceDateTime'][0:10]
                reason_code = entry['resource']['vaccineCode']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "MedicationRequest":
                start_date = entry['resource']['authoredOn'][0:10]
                reason_code = entry['resource']['medicationCodeableConcept']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
        
        # patient_data and patient_label are added to fhircodes
        fhircodes[id] = {}
        fhircodes[id].update(patient_label)
        fhircodes[id]['codes'] = []
        fhircodes[id]['codes'].append(patient_data)


class FramesCHF:

    def __init__(self, dict_patients, label):
        # Construct DataFrame from dictionary
        df = pd.DataFrame.from_dict(dict_patients).T.reset_index().rename(columns={'index':'id'})
        # Separate chf from general populace
        df_chf_codes = df[df.chf_first_discharge.notnull()].iloc[:, [0,3]]
        # Convert df to buckets
        df_buckets = DFUtil.df_to_buckets(df_chf_codes, dict_patients, label, 24)

        self.chf_trainingset = DFUtil.normalize(df_buckets, label)


In [0]:
class FHIRDataMYINF:

    def __init__(self, path, sample_size=None):
        self.data = self.extract_codes(path, sample_size)

    # Constructs then returns fhircodes
    def extract_codes(self, path, sample_size=None):
        fhircodes = {}
        counter, n = 1, 1

        with tarfile.open(path, "r:gz") as tfile:
            for member in tfile:
                if (member.isdir()):
                    continue

                self.extract_patient(fhircodes, pd.read_json(tfile.extractfile(member)))

                if (sample_size == counter-1):
                    break
                if (counter == n):
                    print("Processed " + str(counter) + " files")
                    n = n*2
                counter = counter+1

        return fhircodes

    # Updates a dictionary from a single patient json
    def extract_patient(self, fhircodes, patient):
        id = patient['entry'][0]['resource']['id']
        patient_data = []
        patient_label = {}
        patient_label['myinf_hosp'] = None
        
        for entry in patient['entry']:
            resource_type = entry['resource']['resourceType']
            
            if resource_type == "Encounter":
                start_date = entry['resource']['period']['start'][0:10]
                end_date = entry['resource']['period']['end'][0:10]
                try:
                    reason_code = entry['resource']['reasonCode'][0]['coding'][0]['display']
                except:
                    reason_code = entry['resource']['type'][0]['coding'][0]['display']
                # Get myinf hospitalization date
                class_code = entry['resource']['class']['code']
                if (class_code == "IMP" or class_code == "EMER") and reason_code == "Myocardial Infarction":
                    if patient_label['myinf_hosp'] is None:
                        patient_label['myinf_hosp'] = start_date
                        # Stop collecting patient data
                        break

                # Add Code
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Observation":
                start_date = entry['resource']['effectiveDateTime'][0:10]
                reason_code = entry['resource']['code']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Procedure":
                start_date = entry['resource']['performedPeriod']['start'][0:10]
                reason_code = entry['resource']['code']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Condition":
                start_date = entry['resource']['onsetDateTime'][0:10]
                reason_code = entry['resource']['code']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "Immunization":
                start_date = entry['resource']['occurrenceDateTime'][0:10]
                reason_code = entry['resource']['vaccineCode']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
            
            elif resource_type == "MedicationRequest":
                start_date = entry['resource']['authoredOn'][0:10]
                reason_code = entry['resource']['medicationCodeableConcept']['coding'][0]['display']
                patient_data.append([start_date, reason_code])
        
        # patient_data and patient_label are added to fhircodes
        fhircodes[id] = {}
        fhircodes[id].update(patient_label)
        fhircodes[id]['codes'] = []
        fhircodes[id]['codes'].append(patient_data)


class FramesMYINF:

    def __init__(self, dict_patients):
        # Construct DataFrame from dictionary
        df = pd.DataFrame.from_dict(dict_patients).T.reset_index().rename(columns={'index':'id'})
        df_myinf_codes = df[df.myinf_hosp.notnull()].iloc[:, [0,2]]

        # Create an array of dataframe buckets with an increasing step size
        dfs_buckets = []
        for i in range(6):
            dfs_buckets.append(DFUtil.df_to_buckets(df_myinf_codes, dict_patients, 'myinf_hosp', 6, i))
        
        # Flatten multidimensional dfs in a single list
        flatten_dfs_buckets = list(chain.from_iterable(dfs_buckets))
        # Normalize so all codes across patients appear in every dataframe
        self.myinf_trainingset = DFUtil.normalize(flatten_dfs_buckets, 'myinf_hosp')


In [0]:
class DFUtil:

    # Converts a DataFrame of codes into monthly buckets
    @staticmethod
    def df_to_buckets(df_codes, dict_patients, label, window_range, step_size=0):
        frames = []

        for index, row in df_codes.iterrows():
            id = row['id']

            # Get specified range
            try: # via label
                end_range = pd.to_datetime(datetime.strptime(dict_patients[id][label], '%Y-%m-%d').date())
            except: # via latest date
                end_range = pd.to_datetime(datetime.strptime(row['codes'][0][-1][0], '%Y-%m-%d').date())
            end_range = end_range + relativedelta(months=1) - relativedelta(months=step_size)
            start_range = end_range - relativedelta(months=window_range)

            # Set DataFrame to range
            df = pd.DataFrame(row['codes'][0]).rename(columns={0:'date', 1:'codes'})
            try:
                df['date'] = pd.to_datetime(df['date'])
            except:
                continue

            df = df[df['date'].between(start_range, end_range)].set_index('date')

            # Group codes by month
            df = df.groupby(pd.Grouper(freq='M'))
            df = df.aggregate(lambda x: tuple(x)).reset_index()

            # Flatten df['codes'] into array[month][code]
            arr_codes = []
            for codes in df['codes']:
                code_dict = {}
                for code in codes:
                    code_dict[code] = 1.0
                arr_codes.append(code_dict)

            # Add flattened codes to df
            df = df.join(pd.DataFrame.from_dict(arr_codes).fillna(0)).drop(columns=['codes'])

            # Fill in missing months
            df.set_index('date', inplace=True)
            df = df.reindex(pd.date_range(start_range, end_range, freq='M'), fill_value=0)

            # Replace index with numbers
            df = df.reset_index(drop=True)

            # Add label
            if (dict_patients[id][label] is not None and step_size == 0):
                df[label] = 1.0
            else:
                df[label] = 0.0

            frames.append(df)
            
        return frames

    @staticmethod
    def normalize(bucket_frames, training_label):
        all_columns = []
        for frame in bucket_frames:
            all_columns.extend(x for x in frame.columns.tolist() if not x in all_columns)
            
        final_frames = []
        for df in bucket_frames:
            cols = df.columns.tolist()
            cols.extend(x for x in all_columns if not x in cols)
            df = df.reindex(columns=sorted(cols, reverse=False), fill_value=0)
            col = df[training_label] # move training_label column to end of dateframe
            df.drop(labels=[training_label], axis=1,inplace = True)
            df[training_label] = col
            final_frames.append(df)

        return final_frames

    @staticmethod
    def shuffleColumns(dfs, training_label, num_shuffled):
        dfs_shuffled = []
        for _ in range(num_shuffled):
            df_shuffled = dfs.copy()
            # makes column labels the first row. (numpy only works with numbered columns so this preserves our label names)
            df_shuffled[0] = pd.DataFrame(np.vstack([df_shuffled[0].columns, df_shuffled[0]]))
            # randomize columns using numpy
            arr = df_shuffled[0].to_numpy()
            np.random.shuffle(arr.T)
            # convert back to pandas dataframe
            df_shuffled[0] = pd.DataFrame(arr)
            df_shuffled[0].columns = df_shuffled[0].iloc[0]
            df_shuffled[0] = df_shuffled[0].drop(df_shuffled[0].index[0]).reset_index(drop=True)
            # move training_label to end of dataframe
            col = df_shuffled[0][training_label]
            df_shuffled[0].drop(labels=[training_label], axis=1, inplace = True)
            df_shuffled[0][training_label] = col
            # reindex all dfs on df_shuffled[0]
            for i in range(len(df_shuffled)):
              df_shuffled[i] = df_shuffled[i].reindex(df_shuffled[0].columns, axis=1)

            dfs_shuffled.append(df_shuffled)

        return dfs_shuffled

In [0]:
class OSUtil:

    @staticmethod
    def trainingset_from_csv(path):
        dfs_trainingset = []
        files = os.listdir(path)
        for file in files:
          try:
            dfs_trainingset.append(pd.read_csv(file, index_col ='Unnamed: 0'))
          except Exception as e:
            print(e)
            continue
        return dfs_trainingset

    @staticmethod
    def export_csv(path, dfs, overwrite=False):
        if (os.path.exists(path) == False):
            !mkdir $path
        if (overwrite == True):
            !rm $path"/*.csv"
            
        print('exporting ' + path)
        for i in range(len(dfs)):
            dfs[i].to_csv(path + '/patient' + str(i) + '.csv')
        print('done')

    @staticmethod
    def zip_folder(input_path, output_path):
        print('compressing ' + input_path)
        !zip -r $output_path".zip" $input_path
        print('finished')

    @staticmethod
    def delete_folder(path):
        !rm -rf $path

Create training data from congestive heart failure patients

In [778]:
fhirdata_chf = FHIRDataCHF("synthea-data-chf.tar.gz", 16)

Processed 1 files
Processed 2 files
Processed 4 files
Processed 8 files
Processed 16 files


In [0]:
chf_frames = FramesCHF(fhirdata_chf.data, "chf_rehosp")

In [780]:
display(chf_frames.chf_trainingset[0])

Unnamed: 0,10 ML Furosemide 10 MG/ML Injection,120 ACTUAT Fluticasone propionate 0.044 MG/ACTUAT Metered Dose Inhaler,24 HR Metformin hydrochloride 500 MG Extended Release Oral Tablet,24 HR metoprolol succinate 100 MG Extended Release Oral Tablet [Toprol],Abuse-Deterrent 12 HR Oxycodone Hydrochloride 15 MG Extended Release Oral Tablet,Acetaminophen 300 MG / HYDROcodone Bitartrate 5 MG Oral Tablet,Acetaminophen 325 MG / HYDROcodone Bitartrate 7.5 MG Oral Tablet,Acetaminophen 325 MG / Oxycodone Hydrochloride 5 MG Oral Tablet,Acetaminophen 325 MG Oral Tablet,Acute viral pharyngitis (disorder),Admission to orthopedic department,Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma,Albumin [Mass/volume] in Serum or Plasma,Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma,Alzheimer's disease (disorder),Amlodipine 5 MG Oral Tablet,Amoxicillin 500 MG Oral Tablet,Anemia (disorder),Appearance of Urine,Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma,Asthma,Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet,Bilirubin.total [Mass/volume] in Serum or Plasma,Bilirubin.total [Mass/volume] in Urine by Test strip,Bilirubin.total [Presence] in Urine by Test strip,Blood Pressure,Body Height,Body Mass Index,Body Weight,Body temperature,Bone density scan (procedure),Bone immobilization,Brief general examination (procedure),Calcium,Carbon Dioxide,Chloride,Chronic congestive heart failure (disorder),Chronic intractable migraine without aura,Chronic kidney disease stage 1 (disorder),Chronic pain,...,Peripheral blood smear interpretation,Plain chest X-ray,Plain chest X-ray (procedure),Platelet distribution width [Entitic volume] in Blood by Automated count,Platelet mean volume [Entitic volume] in Blood by Automated count,Platelets [#/volume] in Blood by Automated count,Pneumococcal conjugate PCV 13,Potassium,Protein [Mass/volume] in Serum or Plasma,Protein [Mass/volume] in Urine by Test strip,Protein [Presence] in Urine by Test strip,RBC Auto (Bld) [#/Vol],RDW - Erythrocyte distribution width Auto (RBC) [Entitic vol],Respiratory rate,Review of systems (procedure),Second degree burn,Simvastatin 10 MG Oral Tablet,Simvastatin 20 MG Oral Tablet,Sodium,Specific gravity of Urine by Test strip,Suture open wound,Td (adult) preservative free,Tobacco smoking status NHIS,Total Cholesterol,Total score [MMSE],Transthoracic echocardiography,Transthoracic three dimensional ultrasonography of heart (procedure),Triglycerides,Upper arm X-ray,Urea Nitrogen,Urgent care clinic (procedure),Verapamil Hydrochloride 40 MG,Viral sinusitis (disorder),WBC Auto (Bld) [#/Vol],Warfarin Sodium 5 MG Oral Tablet,amLODIPine 5 MG / Hydrochlorothiazide 12.5 MG / Olmesartan medoxomil 20 MG Oral Tablet,"insulin human, isophane 70 UNT/ML / Regular Insulin, Human 30 UNT/ML Injectable Suspension [Humulin]",pH of Urine by Test strip,"pneumococcal polysaccharide vaccine, 23 valent",chf_rehosp
0,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
1,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
2,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
3,0.0,0,0,0,0,0,0,0,1.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,1.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
4,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
5,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
6,0.0,0,0,0,0,0,0,0,0.0,0,0,1.0,1.0,1.0,0,0,0,0,0,1.0,0,0.0,1.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,1.0,1.0,1.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,1.0,1.0,0,0,0,0,0.0,0,0,1.0,0,1.0,0,0.0,0,0.0,1.0,0,0.0,0.0,1.0,0,1.0,0,0,0,0,0,0,0,0,0,0.0
7,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
8,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,0.0,0.0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,0.0,0.0,0,0,0,0,0.0,0,0,0.0,0,0.0,0,0.0,0,0.0,0.0,0,0.0,0.0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0
9,0.0,0,0,0,0,0,0,0,0.0,0,0,0.0,0.0,0.0,0,0,0,0,0,0.0,0,1.0,0.0,0,0,1.0,1.0,1.0,1.0,0,0,0,0,1.0,1.0,1.0,0.0,0,0,0,...,0,0.0,0.0,0,0,0,0.0,1.0,0.0,0,0,0,0,1.0,0,0,0.0,0,1.0,0,0.0,0,1.0,0.0,0,0.0,0.0,0.0,0,1.0,0,0,0,0,0,0,0,0,0,0.0


In [0]:
# arr_dfs_shuffle = DFUtil.shuffleColumns(chf_frames.chf_trainingset, 'chf_rehosp', 10)

Create training data from myocardial infarction patients

In [782]:
fhirdata_myinf = FHIRDataMYINF("synthea-data-myocardial-infarction.tar.gz", 16)

Processed 1 files
Processed 2 files
Processed 4 files
Processed 8 files
Processed 16 files


In [0]:
myinf_frames = FramesMYINF(fhirdata_myinf.data)

In [784]:
display(myinf_frames.myinf_trainingset[0])

Unnamed: 0,24 HR Metformin hydrochloride 500 MG Extended Release Oral Tablet,60 ACTUAT Fluticasone propionate 0.25 MG/ACTUAT / salmeterol 0.05 MG/ACTUAT Dry Powder Inhaler,Acetaminophen 325 MG Oral Tablet,Acute bronchitis (disorder),Acute viral pharyngitis (disorder),Alanine aminotransferase [Enzymatic activity/volume] in Serum or Plasma,Albumin [Mass/volume] in Serum or Plasma,Alkaline phosphatase [Enzymatic activity/volume] in Serum or Plasma,Amlodipine 5 MG Oral Tablet,Aspartate aminotransferase [Enzymatic activity/volume] in Serum or Plasma,Atenolol 50 MG / Chlorthalidone 25 MG Oral Tablet,Bilirubin.total [Mass/volume] in Serum or Plasma,Blood Pressure,Body Height,Body Mass Index,Body Weight,Body mass index (BMI) [Percentile] Per age and gender,Body temperature,Bone density scan (procedure),Bone immobilization,Calcium,Carbon Dioxide,Chloride,Clopidogrel 75 MG Oral Tablet,Colonoscopy,Coronary Heart Disease,Creatinine,DXA [T-score] Bone density,Digoxin 0.125 MG Oral Tablet,Electrical cardioversion,Emergency Encounter,Emergency room admission (procedure),Encounter for 'check-up',Encounter for check up (procedure),Encounter for problem (procedure),Erythrocyte distribution width [Entitic volume] by Automated count,Erythrocytes [#/volume] in Blood by Automated count,Estimated Glomerular Filtration Rate,FEV1/FVC,Fracture of forearm,...,Leukocytes [#/volume] in Blood by Automated count,Low Density Lipoprotein Cholesterol,MCH [Entitic mass] by Automated count,MCHC [Mass/volume] by Automated count,MCV [Entitic volume] by Automated count,Medication Reconciliation (procedure),Meperidine Hydrochloride 50 MG Oral Tablet,Microalbumin Creatinine Ratio,Naproxen sodium 220 MG Oral Tablet,Nitroglycerin 0.4 MG/ACTUAT Mucosal Spray,Nonproliferative diabetic retinopathy due to type 2 diabetes mellitus (disorder),Pain severity - 0-10 verbal numeric rating [Score] - Reported,Patient encounter procedure,Platelet distribution width [Entitic volume] in Blood by Automated count,Platelet mean volume [Entitic volume] in Blood by Automated count,Platelets [#/volume] in Blood by Automated count,Potassium,Protein [Mass/volume] in Serum or Plasma,Respiratory rate,Simvastatin 10 MG Oral Tablet,Simvastatin 20 MG Oral Tablet,Sinusitis (disorder),Sodium,Spirometry (procedure),Sputum examination (procedure),Td (adult) preservative free,Throat culture (procedure),Tobacco smoking status NHIS,Total Cholesterol,Triglycerides,Upper arm X-ray,Urea Nitrogen,Urgent care clinic (procedure),Verapamil Hydrochloride 40 MG,Viral sinusitis (disorder),Warfarin Sodium 5 MG Oral Tablet,Well child visit (procedure),amLODIPine 5 MG / Hydrochlorothiazide 12.5 MG / Olmesartan medoxomil 20 MG Oral Tablet,"insulin human, isophane 70 UNT/ML / Regular Insulin, Human 30 UNT/ML Injectable Suspension [Humulin]",myinf_hosp
0,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,...,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,1.0
1,0,0,0,0,0,0,0,0,1.0,0,0,0,1.0,1.0,1.0,1.0,0,0,0,0,0,0,0,1.0,0,1.0,0,0,0,0,0,0,0,0,0,1.0,1.0,0,0,0,...,1.0,0,1.0,1.0,1.0,1.0,0,0,0,1.0,0,1.0,0.0,1.0,1.0,1.0,0,0,1.0,0,1.0,0,0,0,0,1.0,0,1.0,0,0,0,0,0,0,0,0,0,1.0,0,1.0
2,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,...,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,1.0
3,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,...,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,1.0,0.0,0.0,0.0,0,0,0.0,0,0.0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,1.0
4,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,...,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,1.0
5,0,0,0,0,0,0,0,0,0.0,0,0,0,0.0,0.0,0.0,0.0,0,0,0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0.0,0,0,0,...,0.0,0,0.0,0.0,0.0,0.0,0,0,0,0.0,0,0.0,0.0,0.0,0.0,0.0,0,0,0.0,0,0.0,0,0,0,0,0.0,0,0.0,0,0,0,0,0,0,0,0,0,0.0,0,1.0


Export dataframes to csv files and zip

In [0]:
#OSUtil.export_csv("csv-myocardial-infarction", myinf_frames.myinf_trainingset)

In [0]:
#OSUtil.zip_folder('csv-myocardial-infarction', 'csv-myocardial-infarction')