# Change Point Index Creator and Day Splitter

## Dataset Creator

In [1]:
import sys
import os

sys.path.append(os.path.abspath("../meal_identification/datasets"))

from dataset_generator import *

### Dataset Creator Demo

In [2]:
result_df = dataset_creator(return_data=True)
walker_df = result_df[0]
christopher_df = result_df[1]

Loaded DataFrames: ['500030_2024-07-01_2024-09-30.csv', '679372_2024-07-01_2024-09-30.csv']

Processing: 500030
Columns after coercing time: ['bgl', 'msg_type', 'affects_fob', 'affects_iob', 'dose_units', 'food_g', 'food_glycemic_index', 'food_g_keep', 'date']
Erasing meal overlap with minCarb 10g and 3hr meal window
Data saved successfully in: 0_meal_identification/meal_identification/data/interim

 	 Dataset label: 2024-11-07_500030__timeInter5mins_dayStart4hrs_minCarb10g_3hrMealW.csv

Processing: 679372
Columns after coercing time: ['bgl', 'msg_type', 'affects_fob', 'affects_iob', 'dose_units', 'food_g', 'food_glycemic_index', 'food_g_keep', 'date']
Erasing meal overlap with minCarb 10g and 3hr meal window
No 'ANNOUNCE_MEAL' events to process for top N meals.
Data saved successfully in: 0_meal_identification/meal_identification/data/interim

 	 Dataset label: 2024-11-07_679372__timeInter5mins_dayStart4hrs_minCarb10g_3hrMealW.csv


All data saved successfully in: 0_meal_identificatio

### Explanation

Below we see Walker and Christopher's data for their first 21 meal annoucements in the dataset, notice that they should typically be larger meals and there should not be more than 3 meals per day.

In [None]:
walker_df[walker_df['msg_type'] == 'ANNOUNCE_MEAL'][0:21]

In [None]:
christopher_df[christopher_df['msg_type'] == 'ANNOUNCE_MEAL'][0:21]

### Meal Annoucement Histograms

### Meal Distribution Times

In [None]:
plot_announce_meal_histogram(walker_df, hours_or_15minutes='hours')
plot_announce_meal_histogram(walker_df, hours_or_15minutes='minutes')

In [None]:
plot_announce_meal_histogram(christopher_df, hours_or_15minutes='hours')
plot_announce_meal_histogram(christopher_df, hours_or_15minutes='minutes')

## Dataset Creation Loop - Hyperparameters



In [None]:
from sklearn.model_selection import ParameterGrid

# Define the hyperparameter grid
param_grid = {
    'meal_length_window': [2, 3, 5],
    'n_top_carb_meals': [3, 4],
    'min_carbs': [5, 10]
}

# Create the grid of hyperparameter settings
grid = ParameterGrid(param_grid)

# Print the grid
for params in grid:
    print(params)

In [None]:
for ds_config in grid:
    print(ds_config)
    dataset_creator(meal_length=pd.Timedelta(hours=ds_config['meal_length_window']),
                    n_top_carb_meals=ds_config['n_top_carb_meals'],
                    min_carbs=ds_config['min_carbs'],
                    return_data=False)