## This notebook contains scripts to create a table for modeling, from data in the "poc_sandbox" database.
### First, we grab informtion about the encounters we're interested in (time of admission, patient age and gender), then set up a panads dataframe with the fields we want to collect. We do this separately for data with RRT events and data without RRT events.
### Choose a time frame for data collection: here, we use a time window of 12 hours. We use the data from 13 hours - 1 hour before the RRT event (or non-event), to predict if the RRT event will happen at hour 0. Put another way, we use data from hours 0 - 12, to predict if an rrt event will happen at hour 13.
### For both RRT and non-RRT tables, for each encounter/patient we collect statistics on the fields of interest, looking within the time interval 13 - 1 hours before the event for vitals signs, and looking at anytime during the interval or earlier for information about medications and patient status (e.g. obesity, smoking status). 
### Finally, save file for future use.

In [4]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import datetime as datetime
from impala.util import as_pandas
import cPickle as pickle
%matplotlib notebook
plt.style.use('ggplot')

In [5]:
from impala.dbapi import connect
conn = connect(host="socbddn01.sharp.com", port=21050)
cur = conn.cursor()
cur.execute("use poc_sandbox")

### Set time window of interest to pull patient information from.

In [6]:
timewindow = 12
interval_hr = timewindow + 1

### Function definitions (3)

In [7]:
def create_master_rrt_df(interval_hr):
    '''
    Fill out encounter, patient info & set up columns
    input: the interval before rrt event to look up, in hours (12 would mean data from 12 hr - 1 hr before event)
    input: list of column names
    returns pandas dataframe
    '''

    col_list_rrt = [ 'rrt_ce_id', 'encntr_id', 
                    'event_end_dt_tm', # time the event happened (NOT the end time)
                    'timestart', 'timeend', 'age', 'sex', 
                    'obese', 'smoker', # is the patient obese, or a smoker
                    'prev_rrt', 'on_iv', # has the patient had a previous rrt event during their stay? Is s/he on iv fluids?
                    'bu-nal', # boolean for if patient has taken buprenorphine or naloxone
                    'DBP_mean', 'DBP_recent', # diastolic blood pressure
                    'SBP_mean', 'SBP_recent', # systolic blood pressure
                    'HR_mean', 'HR_recent', # heart rate
                    'MAP_mean', 'MAP_recent', # mean arterial pressure
                    'temp_mean', 'temp_recent',# temperature
                    'SPO2_mean', 'SPO2_recent', # dissoled oxygen
                    'RR_mean', 'RR_recent', # respiratory rate
                    'pulse_mean', 'pulse_recent', # pulse rate
                    'CO2_mean', 'CO2_recent', # carbon dioxide
                    'GCS_mean', 'GCS_recent', #Glasgow coma score
                    'anticoagulants', 'narcotics', 'narc-ans', #narc-ans: narcotic analgesics like codeine + tylenol
                    'antipsychotics', 'chemo', 'dialysis', 'race'
           ]
    
    
    query_rrttimes = '''
    SELECT ce.clinical_event_id as RRT_ce_id, 
    enc.encntr_id, ce.event_end_dt_tm
    , (ce.event_end_dt_tm - {0}*3600000) as timestart
    , (ce.event_end_dt_tm - 1*3600000) as timeend 
    , year(now()) - year(from_unixtime(CAST(p.birth_dt_tm/1000 as bigint))) AS age 
    , CASE p.sex_cd WHEN '362' then 'F' ELSE 'M' END as sex
    , cvr.description as race
    FROM encounter enc 
    INNER JOIN clinical_event ce ON enc.encntr_id = ce.encntr_id 
    INNER JOIN person p ON p.person_id = enc.person_id
    LEFT OUTER JOIN code_value cvr ON cvr.code_value = p.race_cd
    WHERE
    enc.loc_facility_cd='633867' 
    AND enc.encntr_complete_dt_tm < 4e12  
    AND ce.event_cd='54411998' 
    AND ce.result_status_cd NOT IN ('31', '36')  
    AND ce.valid_until_dt_tm > 4e12  
    AND ce.event_class_cd not in ('654645') 
    AND enc.admit_type_cd !='0' 
    AND enc.encntr_type_class_cd='391' ORDER BY enc.encntr_id, event_end_dt_tm
    ;'''.format(interval_hr)

    cur.execute(query_rrttimes)
    masterdf = as_pandas(cur)
    masterdf = masterdf.reindex(columns=col_list_rrt)
    masterdf['age'] = pd.to_numeric(masterdf.age, errors = 'coerce')
    
    # add reason for rrt
    query_rrtreasons = '''
    SELECT ce.event_end_dt_tm, ce.event_tag as rrt_reason
    FROM encounter enc 
    INNER JOIN clinical_event ce ON enc.encntr_id = ce.encntr_id 
    INNER JOIN person p ON p.person_id = enc.person_id
    WHERE
    enc.loc_facility_cd='633867' 
    AND enc.encntr_complete_dt_tm < 4e12  
    AND ce.event_cd='54408578' 
    AND ce.result_status_cd NOT IN ('31', '36')  
    AND ce.valid_until_dt_tm > 4e12  
    AND ce.event_class_cd not in ('654645') 
    AND enc.admit_type_cd !='0' 
    AND enc.encntr_type_class_cd='391' ORDER BY enc.encntr_id, event_end_dt_tm
    ;'''.format(interval_hr)

    cur.execute(query_rrtreasons)
    df_reasons = as_pandas(cur)
    
    return pd.merge(masterdf, df_reasons, how = 'left', on = 'event_end_dt_tm')

In [8]:
def create_master_nonrrt_df(interval_hr, numrows):
    '''
    Query to pull encounters & mid-times of stay for inpatient patients from Sharp memorial
    Note, {0} - interval hour in ms; {1} is 1 hr in ms; {2} is 2*interval_hr in ms
    numrows - the number of records / people you'd like to pull from the encounters table
    interval_hr = How many hours do you want to use as the timeframe? 12 hours = [12 hr - 1 hr] before prediction value. 
        (e.g. putting in 12 will return 11 hour time interval for records)
    '''
 
    col_list_nonrrt = [ 'encntr_id', 'not_rrt_time', # have to pick a time for a non-event, from a patients record
                    'timestart', 'timeend', 'age', 'sex', 
                    'obese', 'smoker', # is the patient obese, or a smoker
                    'prev_rrt', 'on_iv', # has the patient had a previous rrt event during their stay? Is s/he on iv fluids?
                    'bu-nal', # boolean for if patient has taken buprenorphine or naloxone
                    'DBP_mean', 'DBP_recent', # diastolic blood pressure
                    'SBP_mean', 'SBP_recent', # systolic blood pressure
                    'HR_mean', 'HR_recent', # heart rate
                    'MAP_mean', 'MAP_recent', # mean arterial pressure
                    'temp_mean', 'temp_recent',# temperature
                    'SPO2_mean', 'SPO2_recent', # dissoled oxygen
                    'RR_mean', 'RR_recent', # respiratory rate
                    'pulse_mean', 'pulse_recent', # pulse rate
                    'CO2_mean', 'CO2_recent', # carbon dioxide
                    'GCS_mean', 'GCS_recent', #Glasgow coma score
                    'anticoagulants', 'narcotics', 'narc-ans', #narc-ans: narcotic analgesics like codeine + tylenol
                    'antipsychotics', 'chemo', 'dialysis', 'race'
           ]

    query_notrrt = '''
    SELECT enc.encntr_id
    , round(enc.arrive_dt_tm + (enc.depart_dt_tm-enc.arrive_dt_tm)/2) as not_rrt_time
    , round(enc.arrive_dt_tm + (enc.depart_dt_tm-enc.arrive_dt_tm)/2) - {0} as timestart
    , round(enc.arrive_dt_tm + (enc.depart_dt_tm-enc.arrive_dt_tm)/2) - {1} as timeend
    , year(now()) - year(from_unixtime(CAST(p.birth_dt_tm/1000 as bigint))) AS age 
    , CASE p.sex_cd WHEN '362' then 'F' ELSE 'M' END as sex
    , cvr.description as race

    FROM encounter enc
    INNER JOIN person p on p.person_id = enc.person_id
    LEFT OUTER JOIN code_value cvr ON cvr.code_value = p.race_cd
    
    WHERE enc.depart_dt_tm - enc.arrive_dt_tm > {2}
    AND enc.admit_type_cd != '0'
    AND enc.encntr_type_class_cd = '391'
    AND enc.loc_facility_cd='633867'
    AND encntr_complete_dt_tm < 4e12
    AND enc.encntr_id NOT IN (  
        SELECT enc.encntr_id FROM encounter enc 
        INNER JOIN clinical_event ce ON enc.encntr_id = ce.encntr_id 
        WHERE enc.loc_facility_cd='633867' AND enc.encntr_complete_dt_tm < 4e12  
        AND ce.event_cd='54411998' 
        AND ce.result_status_cd NOT IN ('31', '36')  
        AND ce.valid_until_dt_tm > 4e12  
        AND ce.event_class_cd not in ('654645') 
        AND enc.admit_type_cd !='0' 
        AND enc.encntr_type_class_cd='391'  
                            )
    ORDER BY enc.encntr_id
    LIMIT {3}
    ;'''.format(interval_hr*3600*1000, 3600*1000, 2*interval_hr*3600*1000, numrows)

    cur.execute(query_notrrt)
    masternonrrt_df = as_pandas(cur)
    masternonrrt_df = masternonrrt_df.reindex(columns=col_list_nonrrt)
    masternonrrt_df['age'] = pd.to_numeric(masternonrrt_df.age, errors = 'coerce')
    return masternonrrt_df

In [21]:
def pull_and_writedata_2query(masterdf, i):
    '''
    Input: masterdf to write,  index, pairs (of col name - code value)
    returns: masterdf with row values filled out
    Warning: takes a long time to run
    '''
    enc = masterdf.encntr_id.iloc[i]
    timestart = masterdf.timestart.iloc[i]
    timeend = masterdf.timeend.iloc[i]
    age = masterdf.age.iloc[i]
    
    query = '''
    SELECT ce.encntr_id, ce.event_cd 
    ,cv_event_cd.description AS event_description
    ,ce.event_end_dt_tm AS unix_event_end_tm 
    , ce.result_val, ce.result_units_cd
    FROM clinical_event ce 
    JOIN encounter enc ON enc.encntr_id = ce.encntr_id
    LEFT OUTER JOIN code_value   cv_event_cd 
    ON   ce.event_cd           = cv_event_cd.code_value     
    WHERE ce.encntr_id = '{0}' 
    AND ce.event_end_dt_tm < {1} 
    AND ce.result_status_cd NOT IN ('31', '36')
    AND ce.event_class_cd NOT IN ('654645')
    AND ce.valid_until_dt_tm > 4e12
    AND ce.event_cd IN ('679984', '2797130','2798305', '703306', '703501', '703511', '703516', 
    '703540', '703558', '703565', '2700541', '2700653',
    '3623994', '4674677', '4686698','4690633', '679984', '2797130','2798305', '2797129', '75144985',
    '54411998', '2700653', '4674677', '3618608', '186470117') 
    ORDER BY ce.encntr_id, ce.performed_dt_tm;
    '''.format(enc, timeend)

    cur.execute(query)
    df = as_pandas(cur)
    df['result_val'] = pd.to_numeric(df.result_val, errors = 'coerce')

    df_timebox = df[df['unix_event_end_tm']>timestart]
    
    pairs = [('DBP', '703516'), ('SBP', '703501'), ('HR', '2700541'),
          ('MAP', '703306'), ('temp', '703558'), ('RR', '703540'),
          ('SPO2', '3623994'), ('pulse', '703511'),
           ('CO2', '4690633'), ('GCS', '703565')
          ]

    for pair in pairs:       
        if (df_timebox[df_timebox['event_cd']==pair[1]]).empty: # df with this event code is empty
            masterdf.ix[i, pair[0]+"_mean"] = np.nan
            masterdf.ix[i, pair[0]+"_recent"] = np.nan
        else:
            masterdf.ix[i, pair[0]+"_mean"] = df_timebox[df_timebox['event_cd']==pair[1]]['result_val'].mean()
            masterdf.ix[i, pair[0]+"_recent"] = df_timebox[df_timebox['event_cd']==pair[1]
                                                  ].sort_values(by='unix_event_end_tm', ascending=False).iloc[0]['result_val']
    

    if (df[df['event_cd']=='679984']).empty: 
        masterdf.ix[i, 'on_iv'] = 0
    else:
        masterdf.ix[i, 'on_iv'] = 1
        
    if ( (df[df['event_cd']=='2797130']).empty &
         (df[df['event_cd']=='2798305']).empty &
         (df[df['event_cd']=='2797129']).empty ):
        masterdf.ix[i, 'bu-nal'] = 0
    else:
        masterdf.ix[i, 'bu-nal'] = 1
        
    if df[df['event_cd']=='186470117'].empty:
        masterdf.ix[i, 'dialysis'] = 0
    else:
        masterdf.ix[i, 'dialysis'] = 1
        
    # smoking status - binary, 0 for nonsmoker/former smoker /unknown, 1 for smoker
    # assuming if more than 1 of these smoking lines exist, they will say the same thing.
    if (df[df.event_cd=='75144985']).empty:
        masterdf.ix[i, 'smoker'] = 0
    elif ( (df[df.event_cd=='75144985'].result_val.get_values()[0] == 'Heavy tobacco smoker') | 
     (df[df.event_cd=='75144985'].result_val.get_values()[0] == 'Light tobacco smoker') |
     (df[df.event_cd=='75144985'].result_val.get_values()[0] == 'Current every day smoker') |
     (df[df.event_cd=='75144985'].result_val.get_values()[0] == 'Current some day smoker') |
     (df[df.event_cd=='75144985'].result_val.get_values()[0] == 'Smoker, current status unknown')  ):
        masterdf.ix[i, 'smoker'] = 1
    else: 
        masterdf.ix[i, 'smoker'] = 0
        
    # previous RRT event
    if (df[df.event_cd=='54411998']).empty:
        masterdf.ix[i, 'prev_rrt'] = 0
    else:
        masterdf.ix[i, 'prev_rrt'] = 1
        
    # Obesity status
    if (df[df['event_cd']=='2700653']).empty:
        masterdf.ix[i, 'obese'] = np.nan
    elif (df[df['event_cd']=='4674677']).empty: 
        masterdf.ix[i, 'obese'] = np.nan
    else:
        # assuming there won't be much variation -- grab first value from both of height & weight
        height = pd.to_numeric(df[df['event_cd']=='2700653']['result_val'].get_values())[0]
        if df[df['event_cd']=='2700653']['result_units_cd'].get_values()[0] == '267':
            # convert inch -> cm
            height = height * 2.54
        height = height/100.0  # convert to get height in m
        weight = pd.to_numeric(df[df['event_cd']=='4674677']['result_val'].get_values())[0]
        bmi = weight / (height*height)
        if (bmi>30) & (age>19):
            masterdf.ix[i, 'obese'] = 1
        else:
            masterdf.ix[i, 'obese'] = 0
          
    
    # querying orders for medications
    query_ords = '''SELECT ords.encntr_id, mdx.multum_category_id, orig_order_dt_tm
    FROM (SELECT encntr_id, cki, substr(cki,9) as cki_id, order_id, orig_order_dt_tm FROM orders) ords 
    LEFT OUTER JOIN mltm_category_drug_xref mdx ON ords.cki_id = mdx.drug_identifier 
    LEFT OUTER JOIN mltm_drug_categories mdc ON mdc.multum_category_id = mdx.multum_category_id 
    WHERE mdx.multum_category_id IN ('261', '262','285', '283', '60', '191', '77', '210', '251', '341', '20', '21', 
                                   '22', '23', '24', '25', '26') 
    AND ords.encntr_id = '{0}'
    AND ords.orig_order_dt_tm < {1} 
    ;'''.format(enc, timeend)
    
    cur.execute(query_ords)
    df_ords = as_pandas(cur)
        
    if df_ords[(df_ords['multum_category_id']=='261') | (df_ords['multum_category_id']=='262') 
        | (df_ords['multum_category_id']=='283') | (df_ords['multum_category_id']=='285') ].empty:
        masterdf.ix[i, 'anticoagulants'] = 0
    else:
        masterdf.ix[i, 'anticoagulants'] = 1

    if (df_ords[df_ords['multum_category_id']=='60']).empty: # df with this event code is empty
        masterdf.ix[i, 'narcotics'] = 0
    else:
        masterdf.ix[i, 'narcotics'] = 1

    if (df_ords[df_ords['multum_category_id']=='191']).empty: # df with this event code is empty
        masterdf.ix[i, 'narc-ans'] = 0
    else:
        masterdf.ix[i, 'narc-ans'] = 1

    if df_ords[ (df_ords['multum_category_id']=='77') | (df_ords['multum_category_id']=='210') 
        | (df_ords['multum_category_id']=='251') | (df_ords['multum_category_id']=='341') ].empty:
        masterdf.ix[i, 'antipsychotics'] = 0
    else:
        masterdf.ix[i, 'antipsychotics'] = 1

    if df_ords[ (df_ords['multum_category_id']=='20') | (df_ords['multum_category_id']=='21') 
        | (df_ords['multum_category_id']=='22') | (df_ords['multum_category_id']=='23') 
        | (df_ords['multum_category_id']=='24') | (df_ords['multum_category_id']=='25')
        | (df_ords['multum_category_id']=='26') ].empty:
        masterdf.ix[i, 'chemo'] = 0
    else:
        masterdf.ix[i, 'chemo'] = 1
        
                
    return masterdf

# Start with RRT event info & add columns to the right.

In [10]:
# Query to pull encounters & RRT times:
masterdf_rrt = create_master_rrt_df(interval_hr)

In [11]:
masterdf_rrt.tail().T

Unnamed: 0,2058,2059,2060,2061,2062
rrt_ce_id,6759294458,6759874515,6757389136,6757896015,6758560074
encntr_id,99903348,99917967,99943580,99958933,99986155
event_end_dt_tm,1441474440000,1441497240000,1441429500000,1441438440000,1441454520000
timestart,1441427640000,1441450440000,1441382700000,1441391640000,1441407720000
timeend,1441470840000,1441493640000,1441425900000,1441434840000,1441450920000
age,67,72,63,65,38
sex,F,M,M,F,F
obese,,,,,
smoker,,,,,
prev_rrt,,,,,


In [12]:
len(masterdf_rrt)

2063

In [13]:
## LOOP TAKES A LONG TIME. For each encounter, it runs 2 queries & collects statistics into masterdf_rrt.

count = 0
for i in xrange(len(masterdf_rrt)):
# for i in xrange(3):
    count += 1
    print "On loop: {0} of {1}".format(count, len(masterdf_rrt))
    masterdf_rrt = pull_and_writedata_2query(masterdf_rrt, i)
masterdf_rrt.to_pickle('RRT_modeling_table_12hour_interval.p') # write to file automatically

On loop: 1 of 2063
On loop: 2 of 2063
On loop: 3 of 2063


In [13]:
masterdf_rrt.tail()

Unnamed: 0,rrt_ce_id,encntr_id,event_end_dt_tm,timestart,timeend,age,sex,obese,smoker,prev_rrt,...,CO2_recent,GCS_mean,GCS_recent,anticoagulants,narcotics,narc-ans,antipsychotics,chemo,dialysis,rrt_reason
2058,6759294458,99903348,1441474440000,1441449240000,1441470840000,67,F,0.0,0.0,0.0,...,,,,1.0,0.0,0.0,0.0,0.0,0.0,"Arrythmias, Tachycardia"
2059,6759874515,99917967,1441497240000,1441472040000,1441493640000,72,M,0.0,0.0,0.0,...,29.0,15.0,15.0,1.0,1.0,0.0,0.0,0.0,0.0,"Arrythmias, Increased Oxygen Need, Low O2 Satu..."
2060,6757389136,99943580,1441429500000,1441404300000,1441425900000,63,M,0.0,0.0,0.0,...,,,,0.0,1.0,1.0,0.0,0.0,0.0,Other: CTA Cardiac study. required IVP medication
2061,6757896015,99958933,1441438440000,1441413240000,1441434840000,65,F,,0.0,0.0,...,,,,0.0,1.0,0.0,1.0,0.0,0.0,"Staff concerned/Worried about Patient, Increas..."
2062,6758560074,99986155,1441454520000,1441429320000,1441450920000,38,F,0.0,0.0,0.0,...,,15.0,15.0,0.0,1.0,1.0,0.0,0.0,0.0,Chest pain


In [14]:
masterdf_rrt.describe().T
# how many nans? --> look at count.

Unnamed: 0,count,mean,std,min,25%,50%,75%,max
event_end_dt_tm,2063.0,1445053000000.0,14785120000.0,1410347000000.0,1431889000000.0,1445573000000.0,1457776000000.0,1471205000000.0
timestart,2063.0,1445028000000.0,14785120000.0,1410322000000.0,1431864000000.0,1445548000000.0,1457750000000.0,1471180000000.0
timeend,2063.0,1445050000000.0,14785120000.0,1410343000000.0,1431885000000.0,1445570000000.0,1457772000000.0,1471201000000.0
age,2063.0,67.41154,16.60477,18.0,58.0,69.0,80.0,101.0
obese,1594.0,0.2986198,0.4577964,0.0,0.0,0.0,1.0,1.0
smoker,2063.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
prev_rrt,2063.0,0.1619001,0.3684485,0.0,0.0,0.0,0.0,1.0
on_iv,2063.0,0.758604,0.4280336,0.0,1.0,1.0,1.0,1.0
bu-nal,2063.0,0.03296171,0.1785796,0.0,0.0,0.0,0.0,1.0
DBP_mean,1986.0,70.62269,12.75968,34.6,61.0,70.09191,79.48864,127.5714


# Prepare non-RRT modeling table!

In [None]:
# Set up a table like masterdf for non-RRT.
# Find encounters that are at least 12 hours long, are inpatient, etc
# Grab 2700 encounters - plenty more than the ~2050 rrt event samples we have, so we can 
# Set a "fake RRT" time -- 13 hours - 1 hour before this time.
# Treat same as for rrt from there on.


In [22]:
numrows = 2700
masterdf_nonrrt = create_master_nonrrt_df(interval_hr, numrows)

In [23]:
masterdf_nonrrt

Unnamed: 0,encntr_id,not_rrt_time,timestart,timeend,age,sex,obese,smoker,prev_rrt,on_iv,...,CO2_recent,GCS_mean,GCS_recent,anticoagulants,narcotics,narc-ans,antipsychotics,chemo,dialysis,race
0,100002668,1441556850000,1441510050000,1441553250000,52,F,,,,,...,,,,,,,,,,Declined
1,100004071,1442085270000,1442038470000,1442081670000,56,M,,,,,...,,,,,,,,,,Other Race
2,100006104,1448472600000,1448425800000,1448469000000,69,F,,,,,...,,,,,,,,,,Black or African American
3,100006763,1453907130000,1453860330000,1453903530000,65,M,,,,,...,,,,,,,,,,White
4,100007374,1442594580000,1442547780000,1442590980000,69,F,,,,,...,,,,,,,,,,White
5,100007910,1441741350000,1441694550000,1441737750000,36,M,,,,,...,,,,,,,,,,Black or African American
6,100008474,1441511190000,1441464390000,1441507590000,43,M,,,,,...,,,,,,,,,,White
7,100009711,1445125140000,1445078340000,1445121540000,53,M,,,,,...,,,,,,,,,,Other Race
8,100011598,1442938110000,1442891310000,1442934510000,66,M,,,,,...,,,,,,,,,,Other Race
9,100011883,1441608720000,1441561920000,1441605120000,48,F,,,,,...,,,,,,,,,,White


In [25]:
count = 0
# for i in xrange(len(masterdf_nonrrt)):
# for i in xrange(217,len(masterdf_nonrrt),1):
for i in xrange(3):
    count += 1
    print "On loop: {0}".format(count)
    masterdf_nonrrt = pull_and_writedata_2query(masterdf_nonrrt, i)
# masterdf_nonrrt.to_pickle('NonRRT_modeling_table_13hr_raw_fromv6.p')
masterdf_rrt.to_pickle('NonRRT_modeling_table_12hour_interval.p') # write to file automatically

On loop: 1
On loop: 2
On loop: 3


In [26]:
masterdf_nonrrt.head()

Unnamed: 0,encntr_id,not_rrt_time,timestart,timeend,age,sex,obese,smoker,prev_rrt,on_iv,...,CO2_recent,GCS_mean,GCS_recent,anticoagulants,narcotics,narc-ans,antipsychotics,chemo,dialysis,race
0,100002668,1441556850000,1441510050000,1441553250000,52,F,,0.0,0.0,1.0,...,,,,0.0,1.0,0.0,0.0,0.0,0.0,Declined
1,100004071,1442085270000,1442038470000,1442081670000,56,M,0.0,0.0,0.0,1.0,...,,15.0,15.0,1.0,1.0,1.0,0.0,0.0,0.0,Other Race
2,100006104,1448472600000,1448425800000,1448469000000,69,F,1.0,0.0,0.0,1.0,...,,15.0,15.0,0.0,1.0,1.0,0.0,0.0,0.0,Black or African American
3,100006763,1453907130000,1453860330000,1453903530000,65,M,,,,,...,,,,,,,,,,White
4,100007374,1442594580000,1442547780000,1442590980000,69,F,,,,,...,,,,,,,,,,White
