In [1]:
import json
import pandas as pd

## Example Json that the bio model takes

In [2]:
path = "../../src/data/gluroo/chris/data.json"
with open(path, "r") as f:
    data = json.load(f)

df = pd.DataFrame(data["readingsForGroupId"]).sort_values("date", ascending=True)

## Load jax's csv data

In [3]:
from src.data.gluroo.gluroo import Gluroo
import pandas as pd

file_path = "../../src/data/gluroo/2024/jax.csv"
config = {
    "max_consecutive_nan_values_per_day": 36,
    "coerse_time_interval": pd.Timedelta(minutes=5),  # Model is set to 5 min intervals
    "day_start_time": pd.Timedelta(hours=4),
    "min_carbs": 5,
    "meal_length": pd.Timedelta(hours=2),
    "n_top_carb_meals": 5,
}

keep_cols = [
    "date",
    "bgl",
    "msg_type",  # I think this is called msg_type?
    "dose_units",  # We can convert this to iob
    "food_g",  # We can convert this to cob
    # "dose_type",
    "affects_fob",
    "affects_iob",
    "food_protein",
    "dose_automatic",
    "fp_bgl",
    "trend",
    "rx_entries_json",
    "event_type",
]

# Load and process the data
loader = Gluroo(
    keep_columns=keep_cols,
    file_path=file_path,
    config=config,
)

Columns after coercing time: ['bgl', 'msg_type', 'dose_units', 'food_g', 'affects_fob', 'affects_iob', 'food_protein', 'dose_automatic', 'fp_bgl', 'trend', 'rx_entries_json', 'event_type', 'food_g_keep']


In [5]:
import src.data.data_transforms as data_transforms

# This data should be one that get converted to the bio model data
# processed_df = loader.processed_data.to_csv("gluroo_data_jax.csv")
processed_df = loader.processed_data
rx_entries_rows = processed_df[processed_df["rx_entries_json"].notna()]

## Add cob and carb availability

In [6]:
processed_df_regular = data_transforms.ensure_regular_time_intervals(processed_df)
processed_df_cob = data_transforms.create_cob_and_carb_availability_cols(
    processed_df_regular
)

## Add iob and ins availability

In [None]:
processed_df_iob = data_transforms.create_iob_and_ins_availability_cols(
    processed_df_cob
)
processed_df_iob.drop(
    columns=["id"],
    inplace=True,
)

In [17]:
# Validation spilt is looking for the 'date' column
processed_df_iob["date"] = processed_df_iob["datetime"]

In [18]:
file_path = "../../src/data/gluroo/gluroo_cached.csv"
processed_df_iob.set_datetime
processed_df_iob.to_csv(file_path)

In [5]:
import dateutil.parser

prop = {
    "messages": [],
    "readingsForGroupId": [],
    "rxDetails": {},
    "dtNow": "2025-03-14T17:18:56.974+00:00",
    "dtUntil": "2025-03-14T17:18:56.969+00:00",
    "bgl": 69,
    "trend": "FORTYFIVE_DOWN",
    "offsetMin": 0,
    "isLooping": True,
    "timezone": "GMT",  # Data is converted to UTC in the data cleaner so need to set this to GMT from Los Angeles time
    "rapidInsulinDiaHours": -4,  # Jax's is -4 in the database
}

# This needs to be set dynamically based on the date of each window
prop["rxDetails"] = (
    {
        "rxfd": {
            "groupId": 10,
            "dateCreated": "2025-01-30T00:25:47.118865+00:00",
            "splitOffsets": None,
            "rxEntriesByRxfdIdList": [  # Not used in the model
                {
                    "offsetIndex": 0,
                    "startOffsetTt": 14400,
                    "name": "Early morn",
                    "carbRatio": 13.5,
                    "isf": 85,
                    "insulinResistance": 1,
                    "target": 100,
                    "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                    "groupId": 10,
                },
                {
                    "offsetIndex": 1,
                    "startOffsetTt": 23400,
                    "name": "Breakfast",
                    "carbRatio": 11.5,
                    "isf": 85,
                    "insulinResistance": 1,
                    "target": 105,
                    "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                    "groupId": 10,
                },
                {
                    "offsetIndex": 2,
                    "startOffsetTt": 37800,
                    "name": "Lunch",
                    "carbRatio": 13.5,
                    "isf": 18,
                    "insulinResistance": 1,
                    "target": 110,
                    "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                    "groupId": 10,
                },
                {
                    "offsetIndex": 3,
                    "startOffsetTt": 52200,
                    "name": "Aft&Dinner",
                    "carbRatio": 13.5,
                    "isf": 85,
                    "insulinResistance": 1,
                    "target": 110,
                    "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                    "groupId": 10,
                },
                {
                    "offsetIndex": 4,
                    "startOffsetTt": 79200,
                    "name": "Night",
                    "carbRatio": 13.5,
                    "isf": 85,
                    "insulinResistance": 1,
                    "target": 110,
                    "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                    "groupId": 10,
                },
            ],
            "rxBasalsByRxfdIdList": [],
        },
        "rxEntries": [
            {
                "offsetIndex": 0,
                "startOffsetTt": 14400,
                "name": "Early morn",
                "carbRatio": 13.5,
                "isf": 85,
                "insulinResistance": 1,
                "target": 100,
                "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                "groupId": 10,
            },
            {
                "offsetIndex": 1,
                "startOffsetTt": 23400,
                "name": "Breakfast",
                "carbRatio": 11.5,
                "isf": 85,
                "insulinResistance": 1,
                "target": 105,
                "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                "groupId": 10,
            },
            {
                "offsetIndex": 2,
                "startOffsetTt": 37800,
                "name": "Lunch",
                "carbRatio": 13.5,
                "isf": 18,
                "insulinResistance": 1,
                "target": 110,
                "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                "groupId": 10,
            },
            {
                "offsetIndex": 3,
                "startOffsetTt": 52200,
                "name": "Aft&Dinner",
                "carbRatio": 13.5,
                "isf": 85,
                "insulinResistance": 1,
                "target": 110,
                "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                "groupId": 10,
            },
            {
                "offsetIndex": 4,
                "startOffsetTt": 79200,
                "name": "Night",
                "carbRatio": 13.5,
                "isf": 85,
                "insulinResistance": 1,
                "target": 110,
                "dateCreated": "2025-01-30T00:25:47.118865+00:00",
                "groupId": 10,
            },
        ],
        "rxBasals": [],
    },
)

## Processed data should have 5 mins interval matching up what the bio model wants

In [6]:
data = []
for index, row in processed_df.iterrows():
    data.append(
        {
            "bgl": row["bg-0:00"],
            "millis": int(
                dateutil.parser.parse(
                    row["datetime"].strftime("%Y-%m-%d %H:%M:%S")
                ).timestamp()
                * 1000
            ),
            "date": row["datetime"].isoformat(),
            "trend": row["trend"],
        }
    )
prop["readingsForGroupId"] = data

In [7]:
message_types = [
    "ANNOUNCE_MEAL",
    "DOSE_INSULIN",
    "INTERVENTION_SNACK",
]
messages_df = processed_df[processed_df["msg_type"].isin(message_types)]
messages_df

Unnamed: 0_level_0,bg-0:00,msg_type,dose_units,carbs-0:00,affects_fob,affects_iob,food_protein,dose_automatic,fp_bgl,trend,rx_entries_json,event_type,food_g_keep,day_start_shift,datetime,p_num
date,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1
2025-02-06 08:05:00+00:00,12.72,ANNOUNCE_MEAL,1.05,12.0,False,True,,False,,FLAT,,cgm_reading,12.0,2025-02-06,2025-02-06 08:05:00+00:00,glu001
2025-02-06 13:15:00+00:00,8.11,DOSE_INSULIN,0.05,,False,True,,True,,FLAT,,cgm_reading,,2025-02-06,2025-02-06 13:15:00+00:00,glu001
2025-02-06 13:25:00+00:00,8.28,DOSE_INSULIN,0.15,,False,True,,True,,FLAT,,cgm_reading,,2025-02-06,2025-02-06 13:25:00+00:00,glu001
2025-02-06 13:40:00+00:00,6.33,DOSE_INSULIN,0.05,,False,True,,True,,FORTYFIVE_DOWN,,cgm_reading,,2025-02-06,2025-02-06 13:40:00+00:00,glu001
2025-02-06 14:40:00+00:00,5.78,ANNOUNCE_MEAL,7.45,86.0,False,True,,False,,FLAT,,cgm_reading,86.0,2025-02-06,2025-02-06 14:40:00+00:00,glu001
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
2025-05-06 13:35:00+00:00,5.06,ANNOUNCE_MEAL,6.95,63.0,False,True,,False,,FLAT,,cgm_reading,63.0,2025-05-06,2025-05-06 13:35:00+00:00,glu001
2025-05-06 18:00:00+00:00,4.83,ANNOUNCE_MEAL,5.30,127.0,False,True,,False,,FLAT,,cgm_reading,53.0,2025-05-06,2025-05-06 18:00:00+00:00,glu001
2025-05-06 20:40:00+00:00,5.06,ANNOUNCE_MEAL,,15.0,,,,,,,,,15.0,2025-05-06,2025-05-06 20:40:00+00:00,glu001
2025-05-06 22:45:00+00:00,8.06,ANNOUNCE_MEAL,0.10,94.0,False,True,,True,,FLAT,,cgm_reading,23.0,2025-05-06,2025-05-06 22:45:00+00:00,glu001


In [8]:
"""
`originalDate` is the timestamp assigned to the message when it was first entered (iff the message later had its date/time changed).  
`date` is the timestamp that the message is currently assigned to and is the one used for predictions.
"""

# TODO: Handle Nan values. Json doesn't support them?
messages = []
for index, row in messages_df.iterrows():
    messages.append(
        {
            "bgl": row["bg-0:00"],
            "bglDate": None,  # Not used in the model
            "fpBgl": row["fp_bgl"],  # Not used in the model
            "type": row["msg_type"],
            "date": row["datetime"].isoformat(),
            "originalDate": None,  # Not used in the model
            "actionMins": None,  # Not used in the model
            "exerciseMins": None,  # Not used in the model
            "exerciseLevel": None,  # Not used in the model
            "foodG": row["carbs-0:00"],
            "foodGlycemicIndex": 1
            if row["msg_type"] == "INTERVENTION_SNACK"
            else 0.5,  # 1 for intervention snacks (pure sugar) and 0.5 for everything else.
            "foodFat": None,  # Not used in the model
            "foodFiber": None,  # Not used in the model
            "foodProtein": row["food_protein"],
            "doseType": "HUMALOG",  # I thnk row['dose_type'] is the one to use but not in current csv
            "doseUnits": row["dose_units"],
            "doseAutomatic": row["dose_automatic"],
            "affectsFob": row["affects_fob"],
            "affectsIob": row["affects_iob"],
            "cancelledDate": None,  # A column for this?
        }
    )

for msg in messages:
    for k, v in msg.items():
        if pd.isna(v):
            msg[k] = None

prop["messages"] = messages

In [9]:
import json

with open("./data.json", "w") as f:
    json.dump(prop, f, indent=4)

## JSON TO CSV (NOT NEEDED AS WE ARE DOING THE OTHER WAY AROUND)

In [None]:
# def integrate_message_data(df: pd.DataFrame, messages: list) -> pd.DataFrame:
#     """
#     Integrates message data into the dataframe by matching to the closest timestamp.

#     Args:
#         df (pd.DataFrame): The input dataframe with datetime index
#         messages (list): List of message dictionaries containing type, foodG, doseUnits, and date

#     Returns:
#         pd.DataFrame: Updated dataframe with new message columns
#     """
#     # Create a copy to avoid modifying the original
#     result_df = df.copy()

#     # Initialize new columns if they don't exist
#     if 'type' not in result_df.columns:
#         result_df['type'] = None
#     if 'foodG' not in result_df.columns:
#         result_df['foodG'] = None
#     if 'doseUnits' not in result_df.columns:
#         result_df['doseUnits'] = None

#     # Convert message dates to datetime
#     message_dates = []
#     for msg in messages:
#         try:
#             date = pd.to_datetime(msg['date'])
#             message_dates.append(date)
#         except (KeyError, ValueError) as e:
#             print(f"Warning: Could not parse date for message: {msg}. Error: {e}")
#             continue

#     # For each message, find the closest timestamp in the dataframe
#     for msg, msg_date in zip(messages, message_dates):
#         try:
#             # Find the closest timestamp by comparing with the date column
#             time_diffs = abs(result_df['date'] - msg_date)
#             closest_idx = time_diffs.idxmin()

#             # Update the values at the closest timestamp
#             result_df.loc[closest_idx, 'type'] = msg.get('type')
#             result_df.loc[closest_idx, 'foodG'] = msg.get('foodG')
#             result_df.loc[closest_idx, 'doseUnits'] = msg.get('doseUnits')
#         except Exception as e:
#             print(f"Warning: Could not process message: {msg}. Error: {e}")
#             continue

#     return result_df

# # Usage:
# msg = data['messages']
# df_cpy = df.copy()
# df_cpy['date'] = pd.to_datetime(df_cpy['date'])
# df_cpy = integrate_message_data(df_cpy, msg)
# df_cpy = df_cpy.rename(columns={'type': 'msg_type'})
# df_cpy.drop(columns=['millis'], inplace=True)
# ins_df = df_cpy[df_cpy['msg_type'] == 'DOSE_INSULIN']
# df_cpy.to_csv('gluroo_data.csv')