In [1]:
import pandas as pd 
import numpy as np 
import plotly.express as px
from pathlib import Path

In [2]:
# Subset of patients to explore, 3 prediabetic and 3 not, 3 female and 3 male
patients = [1, 2, 4, 9, 14]

In [3]:
# Load in demographic data
demo = pd.read_csv(Path('data') / 'Demographics.csv')
key = demo[demo["ID"].isin([1,2,4,9,14])]
key

Unnamed: 0,ID,Gender,HbA1c
1,1,FEMALE,5.5
3,4,FEMALE,6.4
5,2,MALE,5.6
10,9,MALE,6.1
13,14,MALE,5.5


In [4]:
key[key["ID"] == 4]["HbA1c"].iloc[0]

np.float64(6.4)

In [5]:
# Each element of the list is glucose data for a specfic patient
dex = []
for i, v in enumerate(patients):    
    dex.append(pd.read_csv(Path('data') / 'dex' / f'Dexcom_{str(v).zfill(3)}.csv'))

# Clean the data 
for i, v in enumerate(dex):
    dex[i] = dex[i].dropna(subset=["Timestamp (YYYY-MM-DDThh:mm:ss)"])
    dex[i] = dex[i][["Timestamp (YYYY-MM-DDThh:mm:ss)", "Glucose Value (mg/dL)"]]
    dex[i]["Timestamp (YYYY-MM-DDThh:mm:ss)"] = pd.to_datetime(dex[i]["Timestamp (YYYY-MM-DDThh:mm:ss)"])

In [6]:
food = []
for i, v in enumerate(patients):    
    food.append(pd.read_csv(Path('data') / 'food' / f'Food_Log_{str(v).zfill(3)}.csv'))

for i, v in enumerate(food):
    food[i]["time_begin"] = pd.to_datetime(food[i]["time_begin"])

In [7]:
hr = []
for i, v in enumerate(patients):    
    hr.append(pd.read_csv(Path('data') / 'hr' / f'HR_{str(v).zfill(3)}.csv'))

for i, v in enumerate(hr):
    hr[i]["datetime"] = pd.to_datetime(hr[i]["datetime"])

  hr[i]["datetime"] = pd.to_datetime(hr[i]["datetime"])


Create a line graph showing heart rate overtime, for simplicity we will only consider the first day (different for each patient). Plot the heart rates of each patient, coloring in based on prediabetes. 

In [8]:
a = pd.to_datetime(hr[0]["datetime"], format="%m/%d/%y %H:%M")
first_day = a.iloc[0].day
first_day

13

In [9]:
a

0        2020-02-13 15:29:00
1        2020-02-13 15:29:00
2        2020-02-13 15:29:00
3        2020-02-13 15:29:00
4        2020-02-13 15:29:00
                 ...        
634183   2020-02-22 17:55:00
634184   2020-02-22 17:56:00
634185   2020-02-22 17:56:00
634186   2020-02-22 17:56:00
634187   2020-02-22 17:56:00
Name: datetime, Length: 634188, dtype: datetime64[ns]

In [None]:
# Helper function to align hours
def align_hours(df):
    df = df.copy()
    

In [91]:
# Create a new df
hr_df_1 = pd.DataFrame()
# For each patient:
for i, v in enumerate(patients):
#   Query down heart rates for only the first day
    first_day = hr[i]["datetime"].iloc[0].day
    temp = hr[i][hr[i]["datetime"].dt.day == first_day]
    #temp["datetime"] = temp["datetime"].dt.time#('%H:%M')

    temp['datetime'] = (
        (temp['datetime'] - temp['datetime'].min()) / pd.Timedelta(hours=1)
    )

#   Add column for prediabeticness
    prediabetic = False
    if key[key["ID"] == v]["HbA1c"].iloc[0] > 5.7:
        prediabetic = True

    prediabetic_col = [prediabetic] * temp.shape[0]
    id_col = [v] * temp.shape[0]
    temp["prediabetic"] = prediabetic_col
    temp["ID"] = id_col
#   Concatenate with df
    hr_df_1 = pd.concat([hr_df_1, temp])

    #break

hr_df_1 = hr_df_1.sort_values(by=["ID", "datetime"])




A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/

In [92]:
hr_df_1

Unnamed: 0,datetime,hr,prediabetic,ID
0,0.000000,94.00,False,1
1,0.000000,99.00,False,1
2,0.000000,94.00,False,1
3,0.000000,82.50,False,1
4,0.000000,82.60,False,1
...,...,...,...,...
5144,1.428889,68.65,False,14
5145,1.429167,68.67,False,14
5146,1.429444,68.68,False,14
5147,1.429722,68.70,False,14


In [93]:
# Plot
fig = px.line(
    hr_df_1,
    x='datetime',
    y=' hr',
    color='prediabetic',      # color based on diabetes status
    line_group='ID',  # separate line per patient
    title='Heart Rate Over Time by Patient and Prediabetes Status',
    labels={'prediabetic': 'Prediabetic Status'}
)

fig.show()

In [70]:
dex[0].head()

Unnamed: 0,Timestamp (YYYY-MM-DDThh:mm:ss),Glucose Value (mg/dL)
12,2020-02-13 17:23:32,61.0
13,2020-02-13 17:28:32,59.0
14,2020-02-13 17:33:32,58.0
15,2020-02-13 17:38:32,59.0
16,2020-02-13 17:43:31,63.0


In [90]:
# Create plotting df
gc_df = pd.DataFrame()
# For each patient  
for i, v in enumerate(patients):
    # Find their highest carb meal and time of meal
    highest_carb = food[i]["total_carb"].idxmax()
    time_start = pd.to_datetime(food[i].loc[highest_carb]["time_begin"])
    time_end = time_start + pd.Timedelta(hours=2)
    
    # Query df to have only glucose from this period
    temp_dex = dex[i].copy()
    temp_dex = temp_dex[(dex[i]["Timestamp (YYYY-MM-DDThh:mm:ss)"] > time_start) & 
                      (dex[i]["Timestamp (YYYY-MM-DDThh:mm:ss)"] < time_end)]
    
    # Normalize the times
    temp_dex["normalized"] = (
        (temp_dex['Timestamp (YYYY-MM-DDThh:mm:ss)'] - temp_dex['Timestamp (YYYY-MM-DDThh:mm:ss)'].min()) / pd.Timedelta(hours=1)
    )
    
    # Add marker for prediabeticness
    prediabetic = False
    if key[key["ID"] == v]["HbA1c"].iloc[0] > 5.7:
        prediabetic = True

    prediabetic_col = [prediabetic] * temp_dex.shape[0]
    id_col = [v] * temp_dex.shape[0]
    temp_dex["prediabetic"] = prediabetic_col
    temp_dex["ID"] = id_col
    
    # Append to df
    gc_df = pd.concat([gc_df, temp_dex])

gc_df.head()

Unnamed: 0,Timestamp (YYYY-MM-DDThh:mm:ss),Glucose Value (mg/dL),normalized,prediabetic,ID
2220,2020-02-21 12:33:24,115.0,0.0,False,1
2221,2020-02-21 12:38:24,123.0,0.083333,False,1
2222,2020-02-21 12:43:24,131.0,0.166667,False,1
2223,2020-02-21 12:48:24,138.0,0.25,False,1
2224,2020-02-21 12:53:24,142.0,0.333333,False,1


In [94]:
# Plot
fig = px.line(
    gc_df,
    x='normalized',
    y='Glucose Value (mg/dL)',
    color='prediabetic',      # color based on diabetes status
    line_group='ID',  # separate line per patient
    title='Glucose 2 hours after high carb meal',
    labels={'prediabetic': 'Prediabetic Status'}
)

fig.show()

In [96]:
hr[0].head()

Unnamed: 0,datetime,hr
0,2020-02-13 15:29:00,94.0
1,2020-02-13 15:29:00,99.0
2,2020-02-13 15:29:00,94.0
3,2020-02-13 15:29:00,82.5
4,2020-02-13 15:29:00,82.6


In [104]:
# Create plotting df
hr_df = pd.DataFrame()
# For each patient  
for i, v in enumerate(patients):
    # Find their highest carb meal and time of meal
    highest_carb = food[i]["total_carb"].idxmax()
    time_start = pd.to_datetime(food[i].loc[highest_carb]["time_begin"])
    time_end = time_start + pd.Timedelta(hours=2)
    
    # Query df to have only glucose from this period
    temp_hr = hr[i].copy()
    temp_hr = temp_hr[(hr[i]["datetime"] > time_start) & 
                      (hr[i]["datetime"] < time_end)]
    
    # Normalize the times
    temp_hr["normalized"] = (
        (temp_hr['datetime'] - temp_hr['datetime'].min()) / pd.Timedelta(hours=1)
    )
    
    # Add marker for prediabeticness
    prediabetic = False
    if key[key["ID"] == v]["HbA1c"].iloc[0] > 5.7:
        prediabetic = True

    prediabetic_col = [prediabetic] * temp_hr.shape[0]
    id_col = [v] * temp_hr.shape[0]
    temp_hr["prediabetic"] = prediabetic_col
    temp_hr["ID"] = id_col
    
    # Append to df
    hr_df = pd.concat([hr_df, temp_hr])

hr_df.head()

Unnamed: 0,datetime,hr,normalized,prediabetic,ID
536960,2020-02-21 12:31:00,119.52,0.0,False,1
536961,2020-02-21 12:31:00,119.25,0.0,False,1
536962,2020-02-21 12:31:00,119.0,0.0,False,1
536963,2020-02-21 12:31:00,118.88,0.0,False,1
536964,2020-02-21 12:31:00,118.93,0.0,False,1


In [105]:
# Plot
fig = px.line(
    hr_df,
    x='normalized',
    y=' hr',
    color='prediabetic',      # color based on diabetes status
    line_group='ID',  # separate line per patient
    title='Heart Rate Over Time by Patient and Prediabetes Status after high carb meal',
    labels={'prediabetic': 'Prediabetic Status'}
)

fig.show()