<a href="https://colab.research.google.com/github/GUOJL23/COMP90089/blob/main/DataExtractionProject.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
project_id = 'ml-project-437512'

In [2]:
# Import libraries
from datetime import timedelta
import os
import numpy as np
import pandas as pd
import re
import matplotlib.pyplot as plt
import matplotlib.dates as mdates
from sklearn.preprocessing import LabelEncoder

# Make pandas dataframes prettier
from IPython.display import display, HTML, Image
%matplotlib inline

plt.style.use('ggplot')
plt.rcParams.update({'font.size': 20})

# Access data using Google BigQuery.
from google.colab import auth
from google.cloud import bigquery

In [3]:
# Authenticating to use bigquery
auth.authenticate_user()
print('Authenticated')

Authenticated


In [4]:
if project_id == 'CHANGE-ME':
  raise ValueError('You must change project_id to your GCP project.')
os.environ["GOOGLE_CLOUD_PROJECT"] = project_id

def run_query(query, project_id=project_id):
  return pd.io.gbq.read_gbq(
        query,
        project_id=project_id,
        dialect='standard')

In [5]:
# set the dataset
# if you want to use the demo, change this to mimic_demo
dataset = 'mimiciv'

# test it works
df = run_query("""
  SELECT subject_id
  FROM `physionet-data.mimiciv_hosp.patients`
  WHERE subject_id = 10012853
""")
assert df.shape[0] == 1, 'unable to query MIMIC-IV!'

  return pd.io.gbq.read_gbq(


# (1) Data Extraction

In [6]:
# Query to get itemid for creatinine in labitems table
creatinine_query = """
    SELECT itemid, label
    FROM `physionet-data.mimiciv_hosp.d_labitems`
    WHERE label LIKE '%Creatinine%';
"""
creatinine_df = run_query(creatinine_query)
creatinine_df

  return pd.io.gbq.read_gbq(


Unnamed: 0,itemid,label
0,52024,"Creatinine, Whole Blood"
1,50912,Creatinine
2,52546,Creatinine
3,51937,"Creatinine, Stool"
4,51067,24 hr Creatinine
5,51070,"Albumin/Creatinine, Urine"
6,51073,"Amylase/Creatinine Ratio, Urine"
7,51080,Creatinine Clearance
8,51081,"Creatinine, Serum"
9,51082,"Creatinine, Urine"


In [7]:
# query to get simple creatinine level data from patients
creatinine_query_icu = run_query("""
  SELECT subject_id, charttime, safe_cast(value AS FLOAT64) as creatinine_level
  FROM `physionet-data.mimiciv_hosp.labevents`
  WHERE itemid in (51081,50912,51977)
""")
creatinine_query_icu

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,charttime,creatinine_level
0,10004235,2196-02-26 06:26:00,4.0
1,10015860,2192-05-27 07:15:00,4.2
2,10041894,2143-06-29 06:50:00,3.0
3,10048244,2122-07-27 12:00:00,3.1
4,10094811,2118-06-22 07:00:00,5.3
...,...,...,...
3282898,19326654,2154-06-25 06:19:00,1.8
3282899,19612461,2159-07-19 19:45:00,1.8
3282900,19751685,2119-06-16 04:45:00,1.8
3282901,19822462,2162-09-01 04:11:00,1.8


In [8]:
# Query to get subject_id, creatinine level and previous creatinine level of patients who had a creatinine level change of more that 0.3 mg/dL within 48 hrs or increased by 1.5 times in 7 days.
# info from https://www.ncbi.nlm.nih.gov/pmc/articles/PMC10100386/
creatinine_change_query_with_icu = run_query("""
WITH creatinine_data AS (
  SELECT
    subject_id,
    charttime,
    safe_cast(value AS FLOAT64) AS creatinine_level,
    valueuom,
    LAG(safe_cast(value AS FLOAT64)) OVER (PARTITION BY subject_id ORDER BY charttime) AS previous_creatinine_level,
    DATETIME_DIFF(charttime, LAG(charttime) OVER (PARTITION BY subject_id ORDER BY charttime), HOUR) AS time_diff_hours
  FROM `physionet-data.mimiciv_hosp.labevents`
  WHERE itemid IN (51081, 50912, 51977)
)

SELECT
  subject_id,
  creatinine_level,
  previous_creatinine_level,
  (creatinine_level - previous_creatinine_level) AS creatinine_change,
  charttime,
  CASE
    WHEN (time_diff_hours <= 48 AND (creatinine_level - previous_creatinine_level) >= 0.3) THEN '0.3 mg/dL increase in 48 hours'
    WHEN (time_diff_hours <= 168 AND (creatinine_level / NULLIF(previous_creatinine_level, 0)) >= 1.5) THEN '1.5x increase in 7 days'
    ELSE 'None'
  END AS condition_satisfied
FROM creatinine_data
WHERE
  previous_creatinine_level IS NOT NULL
  AND (
    (time_diff_hours <= 48 AND (creatinine_level - previous_creatinine_level) >= 0.3)
    OR
    (time_diff_hours <= 168 AND (creatinine_level / NULLIF(previous_creatinine_level, 0)) >= 1.5)
  )

""")
creatinine_change_query_with_icu

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,creatinine_level,previous_creatinine_level,creatinine_change,charttime,condition_satisfied
0,10059406,7.9,6.9,1.0,2121-12-26 07:35:00,0.3 mg/dL increase in 48 hours
1,10332722,8.5,7.5,1.0,2156-11-28 07:00:00,0.3 mg/dL increase in 48 hours
2,10332722,8.9,8.5,0.4,2156-11-28 19:20:00,0.3 mg/dL increase in 48 hours
3,10401337,24.1,23.6,0.5,2179-03-05 03:37:00,0.3 mg/dL increase in 48 hours
4,10401337,24.5,24.1,0.4,2179-03-05 09:42:00,0.3 mg/dL increase in 48 hours
...,...,...,...,...,...,...
105613,17038917,1.8,1.5,0.3,2141-04-10 01:22:00,0.3 mg/dL increase in 48 hours
105614,17481354,1.8,1.5,0.3,2111-11-08 08:20:00,0.3 mg/dL increase in 48 hours
105615,18575695,1.8,1.1,0.7,2165-12-02 10:57:00,1.5x increase in 7 days
105616,18855302,1.8,1.3,0.5,2169-08-20 08:10:00,0.3 mg/dL increase in 48 hours


In [9]:
# Get subject id, and medication details of patients taking drugs that are found to cause aki
medication_query = run_query("""
  SELECT
    subject_id,
    hadm_id,
    drug,
    ndc,
    starttime,
    stoptime
FROM
    `physionet-data.mimiciv_hosp.prescriptions`
WHERE
    LOWER(drug) IN (
        'ibuprofen', 'naproxen', 'aspirin', 'diclofenac', 'indomethacin',
        'gentamicin', 'tobramycin', 'vancomycin', 'amphotericin b', 'trimethoprim-sulfamethoxazole',
        'cephalexin', 'furosemide', 'bumetanide', 'hydrochlorothiazide', 'spironolactone',
        'amiloride', 'lisinopril', 'enalapril', 'ramipril', 'captopril',
        'losartan', 'valsartan', 'irbesartan', 'candesartan', 'cisplatin',
        'carboplatin', 'methotrexate', 'ifosfamide', 'cyclosporine', 'tacrolimus',
        'sirolimus', 'acyclovir', 'foscarnet', 'tenofovir', 'cidofovir',
        'omeprazole', 'pantoprazole', 'lansoprazole', 'esomeprazole', 'allopurinol',
        'lithium'
    )
ORDER BY
    subject_id, starttime
LIMIT 1000
""")
medication_query

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,hadm_id,drug,ndc,starttime,stoptime
0,10000032,22595853,Furosemide,51079007220,2180-05-07 01:00:00,2180-05-07 09:00:00
1,10000032,22595853,Spironolactone,63739054410,2180-05-07 01:00:00,2180-05-07 09:00:00
2,10000032,22595853,Furosemide,51079007320,2180-05-08 08:00:00,2180-05-07 22:00:00
3,10000032,22595853,Spironolactone,63739054410,2180-05-08 08:00:00,2180-05-07 22:00:00
4,10000032,22841357,Furosemide,51079007220,2180-06-26 23:00:00,2180-06-26 22:00:00
...,...,...,...,...,...,...
995,10008924,27441295,Furosemide,00182117089,2139-04-17 20:00:00,2139-04-18 09:00:00
996,10008924,27441295,Pantoprazole,00008084199,2139-04-18 16:00:00,2139-04-22 19:00:00
997,10008924,27441295,Furosemide,00182116189,2139-04-18 20:00:00,2139-04-22 19:00:00
998,10008924,23676183,Omeprazole,00093521193,2139-07-08 20:00:00,2139-07-09 23:00:00


In [10]:
# Medication details of patinets who satify aki criteria using result of last query
subject_ids = tuple(creatinine_change_query_with_icu['subject_id'].unique())

# New query to fetch medication details based on subject_id from previous query
medication_query_with_creatinine_change = run_query(f"""
SELECT
  subject_id,
  hadm_id,
  drug,
  ndc,
  starttime,
  stoptime
FROM
  `physionet-data.mimiciv_hosp.prescriptions`
WHERE
  subject_id IN {subject_ids}
  AND LOWER(drug) IN (
    'ibuprofen', 'naproxen', 'aspirin', 'diclofenac', 'indomethacin',
    'gentamicin', 'tobramycin', 'vancomycin', 'amphotericin b', 'trimethoprim-sulfamethoxazole',
    'cephalexin', 'furosemide', 'bumetanide', 'hydrochlorothiazide', 'spironolactone',
    'amiloride', 'lisinopril', 'enalapril', 'ramipril', 'captopril',
    'losartan', 'valsartan', 'irbesartan', 'candesartan', 'cisplatin',
    'carboplatin', 'methotrexate', 'ifosfamide', 'cyclosporine', 'tacrolimus',
    'sirolimus', 'acyclovir', 'foscarnet', 'tenofovir', 'cidofovir',
    'omeprazole', 'pantoprazole', 'lansoprazole', 'esomeprazole', 'allopurinol',
    'lithium'
  )
ORDER BY
  subject_id, starttime
""")

  return pd.io.gbq.read_gbq(


In [11]:
# Assuming medication_query_with_creatinine_change contains the new medication data
medication_data = medication_query_with_creatinine_change

# Merge the creatinine change data with medication data
merged_data = pd.merge(
    medication_data,
    creatinine_change_query_with_icu,
    on='subject_id',  # You can join on subject_id
    how='inner'
)

# Ensure that 'charttime' and 'starttime' are in datetime format
merged_data['charttime'] = pd.to_datetime(merged_data['charttime'])
merged_data['starttime'] = pd.to_datetime(merged_data['starttime'])

# Calculate the difference in days and add a new column 'days_prescribed_ahead'
merged_data['days_prescribed_ahead'] = (merged_data['charttime'] - merged_data['starttime']).dt.days

# Filter medications prescribed before the creatinine charttime
filtered_data = merged_data[merged_data['starttime'] < merged_data['charttime']]

In [12]:
filtered_data.info()

<class 'pandas.core.frame.DataFrame'>
Index: 3369055 entries, 0 to 5797282
Data columns (total 12 columns):
 #   Column                     Dtype         
---  ------                     -----         
 0   subject_id                 Int64         
 1   hadm_id                    Int64         
 2   drug                       object        
 3   ndc                        object        
 4   starttime                  datetime64[us]
 5   stoptime                   datetime64[us]
 6   creatinine_level           float64       
 7   previous_creatinine_level  float64       
 8   creatinine_change          float64       
 9   charttime                  datetime64[us]
 10  condition_satisfied        object        
 11  days_prescribed_ahead      float64       
dtypes: Int64(2), datetime64[us](3), float64(4), object(3)
memory usage: 340.6+ MB


In [13]:
demographics_query = run_query(f"""
SELECT
    subject_id,
    gender,
    anchor_age
FROM
    `physionet-data.mimiciv_hosp.patients`
WHERE
    subject_id IN {subject_ids}
ORDER BY
    subject_id
""")
demographics_query

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,gender,anchor_age
0,10000032,F,52
1,10000935,F,52
2,10000980,F,73
3,10001338,F,43
4,10002013,F,53
...,...,...,...
28861,19997538,M,53
28862,19997752,F,66
28863,19998330,F,71
28864,19998497,F,82


In [14]:
merged_data = pd.merge(
    merged_data,           # Use the DataFrame that contains medication and creatinine change data
    demographics_query,      # New demographics data
    on='subject_id',        # Join on subject_id
    how='left'              # Use left join to keep all entries from filtered_data
)
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 5797287 entries, 0 to 5797286
Data columns (total 14 columns):
 #   Column                     Dtype         
---  ------                     -----         
 0   subject_id                 Int64         
 1   hadm_id                    Int64         
 2   drug                       object        
 3   ndc                        object        
 4   starttime                  datetime64[us]
 5   stoptime                   datetime64[us]
 6   creatinine_level           float64       
 7   previous_creatinine_level  float64       
 8   creatinine_change          float64       
 9   charttime                  datetime64[us]
 10  condition_satisfied        object        
 11  days_prescribed_ahead      float64       
 12  gender                     object        
 13  anchor_age                 Int64         
dtypes: Int64(3), datetime64[us](3), float64(4), object(4)
memory usage: 635.8+ MB


In [15]:
race_query = run_query(f"""
SELECT
    distinct(subject_id),
    race
FROM
    `physionet-data.mimiciv_hosp.admissions`
WHERE
    subject_id IN {subject_ids}
ORDER BY
    subject_id
""")
race_query

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,race
0,10000032,WHITE
1,10000935,BLACK/AFRICAN AMERICAN
2,10000980,BLACK/AFRICAN AMERICAN
3,10001338,WHITE
4,10002013,WHITE
...,...,...
31497,19997538,WHITE
31498,19997752,WHITE
31499,19998330,BLACK/AFRICAN AMERICAN
31500,19998497,WHITE


In [16]:
merged_data = pd.merge(
    merged_data,           # Use the DataFrame that contains medication and creatinine change data
    race_query,      # New demographics data
    on='subject_id',        # Join on subject_id
    how='left'              # Use left join to keep all entries from filtered_data
)
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7188284 entries, 0 to 7188283
Data columns (total 15 columns):
 #   Column                     Dtype         
---  ------                     -----         
 0   subject_id                 Int64         
 1   hadm_id                    Int64         
 2   drug                       object        
 3   ndc                        object        
 4   starttime                  datetime64[us]
 5   stoptime                   datetime64[us]
 6   creatinine_level           float64       
 7   previous_creatinine_level  float64       
 8   creatinine_change          float64       
 9   charttime                  datetime64[us]
 10  condition_satisfied        object        
 11  days_prescribed_ahead      float64       
 12  gender                     object        
 13  anchor_age                 Int64         
 14  race                       object        
dtypes: Int64(3), datetime64[us](3), float64(4), object(5)
memory usage: 843.2+ MB


In [17]:
bmi_query = run_query(f"""
SELECT
    subject_id,
    AVG(CAST(result_value AS FLOAT64)) AS avg_bmi
FROM
    `physionet-data.mimiciv_hosp.omr`
WHERE
    LOWER(result_name) LIKE '%bmi%' AND
    subject_id IN {subject_ids}
GROUP BY
    subject_id
ORDER BY
    subject_id
""")
bmi_query

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,avg_bmi
0,10000032,18.537500
1,10000980,30.928767
2,10001338,25.200000
3,10002013,36.241237
4,10002430,25.900000
...,...,...
20002,19997538,35.633333
20003,19997752,27.600000
20004,19998330,28.185714
20005,19998497,19.400000


In [18]:
merged_data = pd.merge(
    merged_data,           # Use the DataFrame that contains medication and creatinine change data
    bmi_query,
    on='subject_id',        # Join on subject_id
    how='left'              # Use left join to keep all entries from filtered_data
)
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7188284 entries, 0 to 7188283
Data columns (total 16 columns):
 #   Column                     Dtype         
---  ------                     -----         
 0   subject_id                 Int64         
 1   hadm_id                    Int64         
 2   drug                       object        
 3   ndc                        object        
 4   starttime                  datetime64[us]
 5   stoptime                   datetime64[us]
 6   creatinine_level           float64       
 7   previous_creatinine_level  float64       
 8   creatinine_change          float64       
 9   charttime                  datetime64[us]
 10  condition_satisfied        object        
 11  days_prescribed_ahead      float64       
 12  gender                     object        
 13  anchor_age                 Int64         
 14  race                       object        
 15  avg_bmi                    float64       
dtypes: Int64(3), datetime64[us](3), floa

In [19]:
diagnosis_query = run_query(f"""
SELECT
    subject_id,
    hadm_id,
    MAX(CASE WHEN icd_code = 'N170' THEN 1 ELSE 0 END) AS N170,
    MAX(CASE WHEN icd_code = 'N171' THEN 1 ELSE 0 END) AS N171,
    MAX(CASE WHEN icd_code = 'N172' THEN 1 ELSE 0 END) AS N172,
    MAX(CASE WHEN icd_code = 'N178' THEN 1 ELSE 0 END) AS N178,
    MAX(CASE WHEN icd_code = 'N179' THEN 1 ELSE 0 END) AS N179,
FROM
    `physionet-data.mimiciv_hosp.diagnoses_icd`
WHERE
    icd_version = 10 AND subject_id IN {subject_ids}
GROUP BY
    subject_id, hadm_id
ORDER BY
    subject_id, hadm_id;
""")
diagnosis_query

  return pd.io.gbq.read_gbq(


Unnamed: 0,subject_id,hadm_id,N170,N171,N172,N178,N179
0,10000980,20897796,0,0,0,0,0
1,10000980,25911675,0,0,0,0,0
2,10000980,29659838,0,0,0,0,0
3,10002013,21763296,0,0,0,0,0
4,10002013,25442395,0,0,0,0,1
...,...,...,...,...,...,...,...
49936,19997538,22701415,0,0,0,0,0
49937,19997538,26704044,0,0,0,0,0
49938,19997752,29452285,0,0,0,0,1
49939,19999828,25744818,0,0,0,0,0


In [20]:
merged_data = pd.merge(
    merged_data,           # Use the DataFrame that contains medication and creatinine change data
    diagnosis_query,
    on=['subject_id','hadm_id'],        # Join on subject_id
    how='left'              # Use left join to keep all entries from filtered_data
)
merged_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7188284 entries, 0 to 7188283
Data columns (total 21 columns):
 #   Column                     Dtype         
---  ------                     -----         
 0   subject_id                 Int64         
 1   hadm_id                    Int64         
 2   drug                       object        
 3   ndc                        object        
 4   starttime                  datetime64[us]
 5   stoptime                   datetime64[us]
 6   creatinine_level           float64       
 7   previous_creatinine_level  float64       
 8   creatinine_change          float64       
 9   charttime                  datetime64[us]
 10  condition_satisfied        object        
 11  days_prescribed_ahead      float64       
 12  gender                     object        
 13  anchor_age                 Int64         
 14  race                       object        
 15  avg_bmi                    float64       
 16  N170                       Int64    

In [21]:
# File path to save the CSV file
csv_file_path = 'final_patient_data.csv'

# Save the final_result DataFrame to a CSV file
merged_data.to_csv(csv_file_path, index=False)

# Optionally, you can print a confirmation message
print(f"Final result saved to {csv_file_path}")


Final result saved to final_patient_data.csv


add diabetes info, histrory of aki, change medicaiton time to duration of medication
bmi of patient 7 days within medicaiton start time


# (2) Data Processing

In [33]:
ml_data = merged_data
ml_data

Unnamed: 0,subject_id,hadm_id,drug,drug_encoded,ndc,starttime,stoptime,creatinine_level,previous_creatinine_level,creatinine_change,...,gender_encoded,anchor_age,race,race_encoded,avg_bmi,N170,N171,N172,N178,N179
0,10000032,22595853,Spironolactone,32,63739054410,2180-05-07 01:00:00,2180-05-07 09:00:00,0.6,0.3,0.3,...,0,52,WHITE,28,18.5375,,,,,
1,10000032,22595853,Spironolactone,32,63739054410,2180-05-07 01:00:00,2180-05-07 09:00:00,0.8,0.4,0.4,...,0,52,WHITE,28,18.5375,,,,,
2,10000032,22595853,Furosemide,15,51079007220,2180-05-07 01:00:00,2180-05-07 09:00:00,0.6,0.3,0.3,...,0,52,WHITE,28,18.5375,,,,,
3,10000032,22595853,Furosemide,15,51079007220,2180-05-07 01:00:00,2180-05-07 09:00:00,0.8,0.4,0.4,...,0,52,WHITE,28,18.5375,,,,,
4,10000032,22595853,Spironolactone,32,63739054410,2180-05-08 08:00:00,2180-05-07 22:00:00,0.6,0.3,0.3,...,0,52,WHITE,28,18.5375,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7188279,19999828,29734428,Omeprazole,28,60505006500,2147-07-29 08:00:00,2147-08-04 22:00:00,0.6,0.3,0.3,...,0,46,WHITE,28,25.7000,0,0,0,0,0
7188280,19999828,25744818,Vancomycin,37,00338355248,2149-01-08 20:00:00,2149-01-14 08:00:00,0.6,0.3,0.3,...,0,46,WHITE,28,25.7000,0,0,0,0,0
7188281,19999828,25744818,Omeprazole,28,00904568461,2149-01-08 21:00:00,2149-01-18 23:00:00,0.6,0.3,0.3,...,0,46,WHITE,28,25.7000,0,0,0,0,0
7188282,19999828,25744818,Aspirin,4,00536100836,2149-01-11 08:00:00,2149-01-18 23:00:00,0.6,0.3,0.3,...,0,46,WHITE,28,25.7000,0,0,0,0,0


In [34]:
label_encoder = LabelEncoder()

# Drug
ml_data['drug_encoded'] = label_encoder.fit_transform(ml_data['drug'])
drug_encoded = ml_data.pop('drug_encoded')
ml_data.insert(ml_data.columns.get_loc('drug') + 1, 'drug_encoded', drug_encoded)

# Gender
ml_data['gender_encoded'] = label_encoder.fit_transform(ml_data['gender'])
gender_encoded = ml_data.pop('gender_encoded')
ml_data.insert(ml_data.columns.get_loc('gender') + 1, 'gender_encoded', gender_encoded)

# Racen
ml_data['race_encoded'] = label_encoder.fit_transform(ml_data['race'])
race_encoded = ml_data.pop('race_encoded')
ml_data.insert(ml_data.columns.get_loc('race') + 1, 'race_encoded', race_encoded)

ml_data.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 7188284 entries, 0 to 7188283
Data columns (total 24 columns):
 #   Column                     Dtype         
---  ------                     -----         
 0   subject_id                 Int64         
 1   hadm_id                    Int64         
 2   drug                       object        
 3   drug_encoded               int64         
 4   ndc                        object        
 5   starttime                  datetime64[us]
 6   stoptime                   datetime64[us]
 7   creatinine_level           float64       
 8   previous_creatinine_level  float64       
 9   creatinine_change          float64       
 10  charttime                  datetime64[us]
 11  condition_satisfied        object        
 12  days_prescribed_ahead      float64       
 13  gender                     object        
 14  gender_encoded             int64         
 15  anchor_age                 Int64         
 16  race                       object   

In [39]:
ml_data = ml_data[['subject_id', 'hadm_id', 'drug_encoded', 'ndc', 'creatinine_level', 'previous_creatinine_level', 'creatinine_change', 'days_prescribed_ahead', 'gender_encoded', 'anchor_age', 'race_encoded', 'avg_bmi', 'N170', 'N171',	'N172',	'N178',	'N179']]

csv_file_path = '/content/drive/My Drive/final_ml_data.csv'
ml_data.to_csv(csv_file_path, index=False)

ml_data

Unnamed: 0,subject_id,hadm_id,drug_encoded,ndc,creatinine_level,previous_creatinine_level,creatinine_change,days_prescribed_ahead,gender_encoded,anchor_age,race_encoded,avg_bmi,N170,N171,N172,N178,N179
0,10000032,22595853,32,63739054410,0.6,0.3,0.3,50.0,0,52,28,18.5375,,,,,
1,10000032,22595853,32,63739054410,0.8,0.4,0.4,95.0,0,52,28,18.5375,,,,,
2,10000032,22595853,15,51079007220,0.6,0.3,0.3,50.0,0,52,28,18.5375,,,,,
3,10000032,22595853,15,51079007220,0.8,0.4,0.4,95.0,0,52,28,18.5375,,,,,
4,10000032,22595853,32,63739054410,0.6,0.3,0.3,49.0,0,52,28,18.5375,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
7188279,19999828,29734428,28,60505006500,0.6,0.3,0.3,0.0,0,46,28,25.7000,0,0,0,0,0
7188280,19999828,25744818,37,00338355248,0.6,0.3,0.3,-529.0,0,46,28,25.7000,0,0,0,0,0
7188281,19999828,25744818,28,00904568461,0.6,0.3,0.3,-530.0,0,46,28,25.7000,0,0,0,0,0
7188282,19999828,25744818,4,00536100836,0.6,0.3,0.3,-532.0,0,46,28,25.7000,0,0,0,0,0
