In [1]:
import pandas as pd
import plotly.express as px

In [2]:
from pathlib import Path

In [3]:
INPUT_DIR = Path('../', 'jsons')
INPUT_DIR
assert INPUT_DIR.exists(), f"Directory {INPUT_DIR} does not exist. Check path (or load in JSONs from Project 3)"

In [4]:
food_log_tagged = pd.read_json(Path(INPUT_DIR, 'food_log_tagged_grouped.json'))

In [18]:
food_log_tagged = food_log_tagged.drop(columns = ['logged_food', 'searched_food', 'tags'])

In [19]:
glucose_data = pd.read_json(Path(INPUT_DIR, 'flattened_glucose.json'))
glucose_data

Unnamed: 0,patient_id,timestamp,glucose
0,1,2020-02-13 17:23:32,61
1,1,2020-02-13 17:28:32,59
2,1,2020-02-13 17:33:32,58
3,1,2020-02-13 17:38:32,59
4,1,2020-02-13 17:43:31,63
...,...,...,...
36893,16,2020-07-24 09:58:05,108
36894,16,2020-07-24 10:03:05,108
36895,16,2020-07-24 10:08:05,106
36896,16,2020-07-24 10:13:05,102


In [55]:

def create_combined_df():
    # Creates a combined df of the food information and the patient id
    temp_df = food_log_tagged.merge(glucose_data, left_on = 'ID', right_on = 'patient_id')
    # Calculate time difference in seconds between glucose reading and meal time
    temp_df['time_after_meal'] = (temp_df['timestamp'] - temp_df['datetime']).dt.total_seconds()
    # Filter for readings between 40 minutes before (-2400s) and 2 hours after (7200s) meals
    combined_df = temp_df[(temp_df['time_after_meal'] >= -1200) & (temp_df['time_after_meal'] <= 7200)]
    return (
        combined_df
        .rename(columns = {'datetime': 'food_time', 'timestamp': 'glucose_time'})
        .drop(columns = ['ID'])
    ).reset_index(drop = True)


In [56]:
combined_df = create_combined_df()
combined_df

Unnamed: 0,food_time,calorie,total_carb,dietary_fiber,sugar,protein,total_fat,patient_id,glucose_time,glucose,time_after_meal
0,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:43:31,63,-989.0
1,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:48:31,67,-689.0
2,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:53:31,68,-389.0
3,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 17:58:31,63,-89.0
4,2020-02-13 18:00:00,456.0,85.0,1.7,83.0,16.0,3.3,1,2020-02-13 18:03:32,59,212.0
...,...,...,...,...,...,...,...,...,...,...,...
14732,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:42:56,170,4376.0
14733,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:47:55,172,4675.0
14734,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:52:56,170,4976.0
14735,2020-06-13 09:30:00,654.0,82.0,5.6,40.0,26.0,26.0,14,2020-06-13 10:57:55,166,5275.0


In [57]:
combined_df.loc[combined_df['patient_id'] == 1, 'food_time'].nunique()

44

In [58]:
OUTPUT_DIR = Path('../', 'jsons')

In [59]:
combined_df.to_json(Path(OUTPUT_DIR, 'combined_meal.json'), orient='records', indent = 2)
