In [None]:
# Ideas
# Carb Spikes after eating a meal
# Compute blood glucose level based on food eaten, aggregate to get blood sugar spikes
# Perhaps take the highest increase in blood sugar over an hour, and take percentile measurements etc.

# Compute the spike more accurately: include largest within 1 hour, largest within 2 hours, after 2 hours etc.




In [10]:
import pandas as pd
import numpy as np

In [11]:
from pathlib import Path

In [12]:
import plotly.express as px

In [13]:
COMBINED_JSONS = Path("../", "jsons", "combined_meal.json")


In [16]:
data = pd.read_json(COMBINED_JSONS)

In [17]:
data

Unnamed: 0,food_time,calorie,total_carb,dietary_fiber,sugar,protein,total_fat,patient_id,glucose_time,glucose,time_after_meal
0,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:43:31,63,-989
1,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:48:31,67,-689
2,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:53:31,68,-389
3,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:58:31,63,-89
4,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 18:03:32,59,212
...,...,...,...,...,...,...,...,...,...,...,...
14732,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:42:56,170,4376
14733,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:47:55,172,4675
14734,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:52:56,170,4976
14735,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:57:55,166,5275


In [18]:
def get_latest_glucose_before_food(group):
    valid_glucose_times = group[group["glucose_time"] < group['food_time']]
    if valid_glucose_times.empty:
        return None
    return valid_glucose_times.sort_values('glucose_time').iloc[-1]['glucose']
    # return valid_glucose_times['glucose_time'].max()

In [19]:
pre_meal_glucose = data.groupby("food_time").apply(get_latest_glucose_before_food).to_frame(name = 'pre_meal_glucose')
pre_meal_glucose

  pre_meal_glucose = data.groupby("food_time").apply(get_latest_glucose_before_food).to_frame(name = 'pre_meal_glucose')


Unnamed: 0_level_0,pre_meal_glucose
food_time,Unnamed: 1_level_1
2020-02-13 18:00:00,63.0
2020-02-13 20:30:00,108.0
2020-02-14 07:10:00,93.0
2020-02-14 09:38:00,90.0
2020-02-14 12:38:00,101.0
...,...
2020-06-12 06:05:00,117.0
2020-06-12 09:10:00,109.0
2020-06-12 12:45:00,102.0
2020-06-12 17:15:00,130.0


In [None]:
# Next Steps
# Get mean glucose over 1 hr after meal, 2 hr after meal, and latest glucose measurement. Compare high protein / fat to
# high carb. Normalize: compare high sugar to high protein.

In [12]:
highest_glucose = data[data['food_time'] < data['glucose_time']].groupby("food_time")['glucose'].max().to_frame(name = 'highest_glucose')
highest_glucose

Unnamed: 0_level_0,highest_glucose
food_time,Unnamed: 1_level_1
2020-02-13 18:00:00,143
2020-02-13 20:30:00,106
2020-02-14 07:10:00,117
2020-02-14 09:38:00,110
2020-02-14 12:38:00,124
...,...
2020-06-12 06:05:00,220
2020-06-12 09:10:00,162
2020-06-12 12:45:00,175
2020-06-12 17:15:00,167


In [13]:
# NA Values removed when calculating glucose range
glucose_range = highest_glucose.merge(pre_meal_glucose, left_index=True, right_index=True, how = 'inner') 
glucose_range


Unnamed: 0_level_0,highest_glucose,pre_meal_glucose
food_time,Unnamed: 1_level_1,Unnamed: 2_level_1
2020-02-13 18:00:00,143,63.0
2020-02-13 20:30:00,106,108.0
2020-02-14 07:10:00,117,93.0
2020-02-14 09:38:00,110,90.0
2020-02-14 12:38:00,124,101.0
...,...,...
2020-06-12 06:05:00,220,117.0
2020-06-12 09:10:00,162,109.0
2020-06-12 12:45:00,175,102.0
2020-06-12 17:15:00,167,130.0


In [14]:
glucose_spikes = (
    glucose_range.merge
    (data, left_index = True, right_on = 'food_time', how = 'inner')
    .drop(columns = ['glucose_time', 'glucose']).drop_duplicates()
    .reset_index(drop = True)
)
glucose_spikes

Unnamed: 0,highest_glucose,pre_meal_glucose,food_time,calorie,total_carb,dietary_fiber,sugar,patient_id
0,143,63.0,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,1
1,106,108.0,2020-02-13 20:30:00,488.0,2.5,1.2,0.8,1
2,117,93.0,2020-02-14 07:10:00,230.0,35.0,0.0,18.0,1
3,110,90.0,2020-02-14 09:38:00,280.0,30.0,0.0,22.0,1
4,124,101.0,2020-02-14 12:38:00,358.0,14.4,0.0,8.7,1
...,...,...,...,...,...,...,...,...
528,220,117.0,2020-06-12 06:05:00,280.0,56.5,1.0,24.0,14
529,162,109.0,2020-06-12 09:10:00,888.0,147.0,2.7,122.0,14
530,175,102.0,2020-06-12 12:45:00,824.0,148.0,1.3,123.0,14
531,167,130.0,2020-06-12 17:15:00,820.0,85.2,4.4,18.5,14


In [15]:
glucose_spikes['glucose_spike'] = (
    glucose_spikes['highest_glucose'] 
    - glucose_spikes['pre_meal_glucose']
)
glucose_spikes

Unnamed: 0,highest_glucose,pre_meal_glucose,food_time,calorie,total_carb,dietary_fiber,sugar,patient_id,glucose_spike
0,143,63.0,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,1,80.0
1,106,108.0,2020-02-13 20:30:00,488.0,2.5,1.2,0.8,1,-2.0
2,117,93.0,2020-02-14 07:10:00,230.0,35.0,0.0,18.0,1,24.0
3,110,90.0,2020-02-14 09:38:00,280.0,30.0,0.0,22.0,1,20.0
4,124,101.0,2020-02-14 12:38:00,358.0,14.4,0.0,8.7,1,23.0
...,...,...,...,...,...,...,...,...,...
528,220,117.0,2020-06-12 06:05:00,280.0,56.5,1.0,24.0,14,103.0
529,162,109.0,2020-06-12 09:10:00,888.0,147.0,2.7,122.0,14,53.0
530,175,102.0,2020-06-12 12:45:00,824.0,148.0,1.3,123.0,14,73.0
531,167,130.0,2020-06-12 17:15:00,820.0,85.2,4.4,18.5,14,37.0


In [22]:
glucose_spikes.to_csv('glucose_spikes.csv', index = False)

In [44]:
(glucose_spikes['total_carb'] == 'nan').mean()

np.float64(0.0)

In [47]:
bins = [0, 10, 20, 35, 50, 75, float('inf')]
glucose_spikes['carb_bin'] = pd.cut(glucose_spikes['total_carb'], bins = bins, right = False).astype(str)


In [75]:
px.bar(glucose_spikes.groupby('carb_bin')['glucose_spike'].mean(),
       labels = {'value': 'Maximum Glucose Increase', 'carb_bin': 'Carbohydrate Bin'},
       title = 'Higher Carbohydrate Content is Correlated With Larger Glucose Spikes',
       color_discrete_sequence= ['darkorange'])

In [67]:
sugar_bins = [0, 3, 10, 20, 35, float('inf')]
sugar_labels = ['0-3', '3-10', '10-20', '20-35', '35+']
glucose_spikes['sugar_bin'] = pd.cut(glucose_spikes['sugar'], bins = sugar_bins, 
                                     right = False, ordered = True, labels = sugar_labels)
glucose_spikes


Unnamed: 0,highest_glucose,pre_meal_glucose,food_time,calorie,total_carb,dietary_fiber,sugar,patient_id,glucose_spike,carb_bin,sugar_bin
0,143,63.0,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,1,80.0,"[75.0, inf)",35+
1,106,108.0,2020-02-13 20:30:00,488.0,2.5,1.2,0.8,1,-2.0,"[0.0, 10.0)",0-3
2,117,93.0,2020-02-14 07:10:00,230.0,35.0,0.0,18.0,1,24.0,"[35.0, 50.0)",10-20
3,110,90.0,2020-02-14 09:38:00,280.0,30.0,0.0,22.0,1,20.0,"[20.0, 35.0)",20-35
4,124,101.0,2020-02-14 12:38:00,358.0,14.4,0.0,8.7,1,23.0,"[10.0, 20.0)",3-10
...,...,...,...,...,...,...,...,...,...,...,...
528,220,117.0,2020-06-12 06:05:00,280.0,56.5,1.0,24.0,14,103.0,"[50.0, 75.0)",20-35
529,162,109.0,2020-06-12 09:10:00,888.0,147.0,2.7,122.0,14,53.0,"[75.0, inf)",35+
530,175,102.0,2020-06-12 12:45:00,824.0,148.0,1.3,123.0,14,73.0,"[75.0, inf)",35+
531,167,130.0,2020-06-12 17:15:00,820.0,85.2,4.4,18.5,14,37.0,"[75.0, inf)",10-20


In [68]:
glucose_spikes['sugar_bin'].value_counts()

sugar_bin
0-3      151
3-10     124
20-35    100
35+       85
10-20     73
Name: count, dtype: int64

In [None]:
px.bar(glucose_spikes.groupby('sugar_bin')['glucose_spike'].mean(),
       labels = {'value': 'Maximum Glucose Increase', 'sugar_bin': 'Sugar Bin'},
       title = 'Higher Sugar Content is Correlated With Larger Glucose Spikes')



