In [25]:
# Ideas
# Carb Spikes after eating a meal
# Compute blood glucose level based on food eaten, aggregate to get blood sugar spikes
# Perhaps take the highest increase in blood sugar over an hour, and take percentile measurements etc.

# Compute the spike more accurately: include largest within 1 hour, largest within 2 hours, after 2 hours etc.




In [26]:
import pandas as pd
import numpy as np

In [27]:
from pathlib import Path

In [28]:
import plotly.express as px

In [29]:
COMBINED_JSONS = Path("../", "jsons", "combined_meal.json")


In [30]:
data = pd.read_json(COMBINED_JSONS)

In [31]:
data

Unnamed: 0,food_time,calorie,total_carb,dietary_fiber,sugar,protein,total_fat,patient_id,glucose_time,glucose,time_after_meal
0,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:43:31,63,-989
1,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:48:31,67,-689
2,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:53:31,68,-389
3,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:58:31,63,-89
4,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 18:03:32,59,212
...,...,...,...,...,...,...,...,...,...,...,...
17880,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:42:56,170,4376
17881,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:47:55,172,4675
17882,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:52:56,170,4976
17883,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:57:55,166,5275


In [32]:
def get_mean_glucose_before_food(group):
    valid_glucose_times = group[group["glucose_time"] < group['food_time']]
    if valid_glucose_times.empty:
        return None
    return valid_glucose_times['glucose'].mean()

In [33]:
pre_meal_glucose = (
  data.groupby(["food_time", "patient_id"])
  .apply(get_mean_glucose_before_food)
  .to_frame(name = 'pre_meal_glucose').reset_index()
)
pre_meal_glucose





Unnamed: 0,food_time,patient_id,pre_meal_glucose
0,2020-02-13 18:00:00,1,65.25
1,2020-02-13 20:30:00,1,114.50
2,2020-02-14 07:10:00,1,95.50
3,2020-02-14 09:38:00,1,87.75
4,2020-02-14 12:38:00,1,96.25
...,...,...,...
530,2020-06-12 06:05:00,14,119.00
531,2020-06-12 09:10:00,14,118.75
532,2020-06-12 12:45:00,14,101.50
533,2020-06-12 17:15:00,14,132.75


In [34]:
def get_highest_glucose_grouped(df):
  return  (df[df['food_time'] < df['glucose_time']]
  .groupby(["food_time", "patient_id"])['glucose']
  .max().to_frame(name = 'highest_glucose')
).reset_index()

In [35]:
diff = data['glucose_time'] - data['food_time']
ending_glucose = (
  data[
    (diff >= pd.Timedelta('120m')) 
    & (diff <= pd.Timedelta('150m'))
    ]
  .groupby(["food_time", "patient_id"])['glucose']
  .mean()
  .to_frame(name = 'mean_glucose_well_after').reset_index()
)

In [36]:
highest_glucose = (
    data[
    (diff >= pd.Timedelta('0m')) 
    & (diff <= pd.Timedelta('120m'))
    ]
  .groupby(["food_time", "patient_id"])['glucose']
  .max().to_frame(name = 'highest_glucose')
).reset_index()
highest_glucose

Unnamed: 0,food_time,patient_id,highest_glucose
0,2020-02-13 18:00:00,1,143
1,2020-02-13 20:30:00,1,106
2,2020-02-14 07:10:00,1,122
3,2020-02-14 09:38:00,1,110
4,2020-02-14 12:38:00,1,124
...,...,...,...
530,2020-06-12 06:05:00,14,220
531,2020-06-12 09:10:00,14,162
532,2020-06-12 12:45:00,14,175
533,2020-06-12 17:15:00,14,167


In [37]:
# NA Values removed when calculating glucose range
glucose_range = (
  highest_glucose
  .merge(pre_meal_glucose, 
  on = ['patient_id', 'food_time'], how = 'inner') 
  .merge(ending_glucose,
  on = ['patient_id', 'food_time'], how = 'inner')
)
glucose_range


Unnamed: 0,food_time,patient_id,highest_glucose,pre_meal_glucose,mean_glucose_well_after
0,2020-02-13 18:00:00,1,143,65.25,117.333333
1,2020-02-13 20:30:00,1,106,114.50,104.333333
2,2020-02-14 07:10:00,1,122,95.50,87.166667
3,2020-02-14 09:38:00,1,110,87.75,91.833333
4,2020-02-14 12:38:00,1,124,96.25,88.666667
...,...,...,...,...,...
523,2020-06-11 18:15:00,14,167,99.25,149.500000
524,2020-06-12 06:05:00,14,220,119.00,138.333333
525,2020-06-12 09:10:00,14,162,118.75,111.333333
526,2020-06-12 12:45:00,14,175,101.50,132.166667


In [38]:
glucose_spikes = (
    glucose_range.merge
    (data, on = ['patient_id', 'food_time'], how = 'inner')
    .drop(columns = ['glucose_time', 'glucose', 'time_after_meal']).drop_duplicates()
    .reset_index(drop = True)
)
glucose_spikes

Unnamed: 0,food_time,patient_id,highest_glucose,pre_meal_glucose,mean_glucose_well_after,calorie,total_carb,dietary_fiber,sugar,protein,total_fat
0,2020-02-13 18:00:00,1,143,65.25,117.333333,456.0,85.0,1.7,83.0,16.0,3.3
1,2020-02-13 20:30:00,1,106,114.50,104.333333,488.0,2.5,1.2,0.8,63.4,23.1
2,2020-02-14 07:10:00,1,122,95.50,87.166667,230.0,35.0,0.0,18.0,13.0,0.0
3,2020-02-14 09:38:00,1,110,87.75,91.833333,280.0,30.0,0.0,22.0,4.0,0.0
4,2020-02-14 12:38:00,1,124,96.25,88.666667,358.0,14.4,0.0,8.7,13.9,0.0
...,...,...,...,...,...,...,...,...,...,...,...
523,2020-06-11 18:15:00,14,167,99.25,149.500000,732.0,85.7,4.0,29.5,26.5,30.8
524,2020-06-12 06:05:00,14,220,119.00,138.333333,280.0,56.5,1.0,24.0,8.0,2.5
525,2020-06-12 09:10:00,14,162,118.75,111.333333,888.0,147.0,2.7,122.0,6.5,33.8
526,2020-06-12 12:45:00,14,175,101.50,132.166667,824.0,148.0,1.3,123.0,5.8,26.8


In [39]:
glucose_spikes

Unnamed: 0,food_time,patient_id,highest_glucose,pre_meal_glucose,mean_glucose_well_after,calorie,total_carb,dietary_fiber,sugar,protein,total_fat
0,2020-02-13 18:00:00,1,143,65.25,117.333333,456.0,85.0,1.7,83.0,16.0,3.3
1,2020-02-13 20:30:00,1,106,114.50,104.333333,488.0,2.5,1.2,0.8,63.4,23.1
2,2020-02-14 07:10:00,1,122,95.50,87.166667,230.0,35.0,0.0,18.0,13.0,0.0
3,2020-02-14 09:38:00,1,110,87.75,91.833333,280.0,30.0,0.0,22.0,4.0,0.0
4,2020-02-14 12:38:00,1,124,96.25,88.666667,358.0,14.4,0.0,8.7,13.9,0.0
...,...,...,...,...,...,...,...,...,...,...,...
523,2020-06-11 18:15:00,14,167,99.25,149.500000,732.0,85.7,4.0,29.5,26.5,30.8
524,2020-06-12 06:05:00,14,220,119.00,138.333333,280.0,56.5,1.0,24.0,8.0,2.5
525,2020-06-12 09:10:00,14,162,118.75,111.333333,888.0,147.0,2.7,122.0,6.5,33.8
526,2020-06-12 12:45:00,14,175,101.50,132.166667,824.0,148.0,1.3,123.0,5.8,26.8


In [40]:
glucose_spikes['stabilize'] = glucose_spikes['highest_glucose'] - glucose_spikes['mean_glucose_well_after']
glucose_spikes['glucose_spike'] = glucose_spikes['highest_glucose'] -  glucose_spikes['pre_meal_glucose']
glucose_spikes['2hour_change'] = glucose_spikes['mean_glucose_well_after'] -  glucose_spikes['pre_meal_glucose']


In [41]:
# It's probably easiest to bucket these into high protein, medium protein, low protein
# I need to plot it out: high protein spike, high carb spike, high 

In [42]:
bins = [0, 10, 20, 35, 50, 75, float('inf')]
glucose_spikes['carb_bin'] = pd.cut(glucose_spikes['total_carb'], bins = bins, right = False).astype(str)


In [43]:
px.bar(glucose_spikes.groupby('carb_bin')['stabilize'].mean(),
       labels = {'value': 'Average Blood Sugar Spike', 'carb_bin': 'Carbohydrate Bin'},
       title = 'Higher Carbohydrate Content is Correlated With More Inflated Blood Sugar Spikes',
       color_discrete_sequence= ['darkorange'])

In [44]:
protein_bins = [0, 3, 10, 15, 25, 40, float('inf')]
protein_labels = ['0-3g', '3-10g', '10-15g', '15-25g', '25-40g', '40+g']

glucose_spikes['protein_bin'] = pd.cut(glucose_spikes['protein'], 
                                       bins=protein_bins, 
                                       labels=protein_labels,
                                       right=False)

px.bar(glucose_spikes.groupby('protein_bin')['stabilize'].mean(),
       labels={'value': 'Average Blood Sugar Spike', 'index': 'Protein Bin'},
       title='Higher Protein Content is Correlated With Slightly Less Blood Sugar Spikes',
       color_discrete_sequence=['green'])





In [45]:
fig = px.scatter(x = glucose_spikes['total_carb'], y = glucose_spikes['stabilize'])
fig


In [46]:
glucose_spikes[['protein', 'total_carb', 'stabilize']].corr()

Unnamed: 0,protein,total_carb,stabilize
protein,1.0,0.431019,-0.020041
total_carb,0.431019,1.0,0.09401
stabilize,-0.020041,0.09401,1.0


In [47]:
px.scatter(x = glucose_spikes['protein'], y = glucose_spikes['stabilize'])

In [48]:
# Create scatter plot with LSRL
glucose_spikes.to_csv('glucose_spikes.csv', index = False)