In [None]:
from __future__ import division, print_function

import collections
import csv
import datetime
import xml.etree.ElementTree as ET

import numpy as np
import pandas as pd

from datetime import datetime, timedelta
from scipy.interpolate import CubicSpline
import matplotlib.pyplot as plt

import torch
from torch.utils.data import DataLoader, TensorDataset
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error
from sklearn.metrics import root_mean_squared_error
import pickle
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')

# Implementation

In [None]:
from LSTM_functions import *

In [None]:
# create the test dataset
filepath = f"../OhioT1DM/2018/train/559-ws-training.xml"

# repeat the same process for the test dataset
glucose = read_ohio(filepath, "glucose_level", True)
glucose_df = transfer_into_table(glucose)
segments = segement_data_as_15min(glucose_df)
meal = add_meal_segments(filepath)
bolus = add_bolus_segments(filepath, meal)

steps = read_ohio(filepath, "basis_steps", True)
flattened_steps_data = [item[0] for item in steps]
step_df = pd.DataFrame(flattened_steps_data)
step_updated_segments = optimize_step_processing(bolus, step_df)

In [None]:
import glob
import os

def process_all_training_files(directory_path):
    # Get all XML files in the directory
    xml_files = glob.glob(os.path.join(directory_path, "*-ws-training.xml"))
    
    all_processed_data = []
    
    for filepath in xml_files:
        try:
            # Process each file
            glucose = read_ohio(filepath, "glucose_level", True)
            glucose_df = transfer_into_table(glucose)
            segments = segement_data_as_15min(glucose_df)
            meal = add_meal_segments(filepath)
            bolus = add_bolus_segments(filepath, meal)

            steps = read_ohio(filepath, "basis_steps", True)
            flattened_steps_data = [item[0] for item in steps]
            step_df = pd.DataFrame(flattened_steps_data)
            step_updated_segments = optimize_step_processing(bolus, step_df)
            
            # Add to list of processed data
            all_processed_data.append({
                'filepath': filepath,
                'segments': step_updated_segments
            })
            
            print(f"Successfully processed {filepath}")
            
        except Exception as e:
            print(f"Error processing {filepath}: {str(e)}")
    
    return all_processed_data

# Usage
directory_path = "../OhioT1DM/2018/train/"
training_data = process_all_training_files(directory_path)

In [68]:
# create a dictionary such that the key is the segment number + i and the value is the segment data
segment_dict = {}
count = 0

segment_name_list = []
segment_data_list= []
for i in training_data: 
    count += 1
    for j in i['segments']:
        segment_dict[str(count)+j] = i['segments'][j]


In [69]:
segment_dict

{'1segment_1':              timestamp  glucose_value  carb_effect  bolus_effect      steps
 0  2021-12-07 16:30:00            101            0          0.58   0.000000
 1  2021-12-07 16:35:00            100            0          0.51   0.000000
 2  2021-12-07 16:40:00            100            0          0.44   0.000000
 3  2021-12-07 16:45:00             99            0          0.37        NaN
 4  2021-12-07 16:50:00             98            0          0.30        NaN
 ..                 ...            ...          ...           ...        ...
 65 2021-12-07 21:55:00            144            1          1.13   3.000000
 66 2021-12-07 22:00:00            140            1          1.06   2.666667
 67 2021-12-07 22:05:00            139            2          0.99   2.400000
 68 2021-12-07 22:10:00            140            2          0.92   7.800000
 69 2021-12-07 22:15:00            140            3          0.70  13.600000
 
 [70 rows x 5 columns],
 '1segment_2':                timest

In [71]:
# # Save the processed data CAREFULL!!!
# # Specify the file name
# filename = './processed_data/BIG_training_data.pkl'
# # Save the dictionary to a file
# if not os.path.exists(filename):
#     open(filename, 'wb').close()
# # Save the dictionary to a file
# with open(filename, 'wb') as f:
#     pickle.dump(segment_dict, f)


# Test

In [80]:
# create the test dataset
dirpath = f"../OhioT1DM/2018/test/"

for filename in os.listdir(dirpath):
    filepath = os.path.join(dirpath,filename)

    # repeat the same process for the test dataset
    glucose = read_ohio(filepath, "glucose_level", True)
    glucose_df = transfer_into_table(glucose)


    segments = segement_data_as_15min(glucose_df)
    meal = add_meal_segments(filepath)
    bolus = add_bolus_segments(filepath, meal)

    steps = read_ohio(filepath, "basis_steps", True)
    flattened_steps_data = [item[0] for item in steps]
    step_df = pd.DataFrame(flattened_steps_data)
    step_updated_segments = optimize_step_processing(bolus, step_df)
    filename = './processed_data/{}_test_combined_segments_noshrink.pkl'.format(filename.split('-')[0])

    # Save the dictionary to a file
    if not os.path.exists(filename):
        open(filename, 'wb').close()

    # Save the dictionary to the file
    with open(filename, 'wb') as f:
        pickle.dump(step_updated_segments, f)

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_meal_effect_df['assigned'] = False


Unequal: begin: 2021-10-29 05:49:42 end: 2021-10-29 05:50:42
Unequal: begin: 2021-10-29 07:01:28 end: 2021-10-29 07:05:28
Unequal: begin: 2021-10-29 12:14:52 end: 2021-10-29 12:15:52
Unequal: begin: 2021-10-29 14:14:18 end: 2021-10-29 14:15:18
Unequal: begin: 2021-10-29 17:11:30 end: 2021-10-29 17:15:30
Unequal: begin: 2021-10-29 20:49:26 end: 2021-10-29 20:50:26
Unequal: begin: 2021-10-29 22:21:02 end: 2021-10-29 22:25:02
Unequal: begin: 2021-10-30 01:24:01 end: 2021-10-30 01:25:01
Unequal: begin: 2021-10-30 05:43:41 end: 2021-10-30 05:45:41
Unequal: begin: 2021-10-30 06:17:27 end: 2021-10-30 06:20:27
Unequal: begin: 2021-10-30 06:41:30 end: 2021-10-30 06:45:30
Unequal: begin: 2021-10-30 14:06:31 end: 2021-10-30 14:10:31
Unequal: begin: 2021-10-30 14:33:57 end: 2021-10-30 14:35:57
Unequal: begin: 2021-10-30 15:52:45 end: 2021-10-30 15:55:45
Unequal: begin: 2021-10-30 18:22:36 end: 2021-10-30 18:25:36
Unequal: begin: 2021-10-30 21:27:17 end: 2021-10-30 21:30:17
Unequal: begin: 2021-10-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_bolus_effect_df["assigned"] = False
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_meal_effect_df['assigned'] = False


Unequal: begin: 2022-01-17 07:23:45 end: 2022-01-17 07:55:45
Unequal: begin: 2022-01-17 11:58:58 end: 2022-01-17 12:00:58
Unequal: begin: 2022-01-17 12:04:02 end: 2022-01-17 12:35:02
Unequal: begin: 2022-01-17 15:49:10 end: 2022-01-17 15:50:10
Unequal: begin: 2022-01-17 19:41:54 end: 2022-01-17 19:45:54
Unequal: begin: 2022-01-17 19:45:45 end: 2022-01-17 20:45:45
Unequal: begin: 2022-01-18 07:11:35 end: 2022-01-18 07:15:35
Unequal: begin: 2022-01-18 07:14:27 end: 2022-01-18 07:45:27
Unequal: begin: 2022-01-18 10:01:18 end: 2022-01-18 10:05:18
Unequal: begin: 2022-01-18 12:04:23 end: 2022-01-18 12:05:23
Unequal: begin: 2022-01-18 12:09:25 end: 2022-01-18 12:40:25
Unequal: begin: 2022-01-18 18:42:18 end: 2022-01-18 18:45:18
Unequal: begin: 2022-01-18 19:03:25 end: 2022-01-18 19:05:25
Unequal: begin: 2022-01-18 19:06:47 end: 2022-01-18 19:40:47
Unequal: begin: 2022-01-19 06:58:49 end: 2022-01-19 07:00:49
Unequal: begin: 2022-01-19 07:03:53 end: 2022-01-19 07:35:53
Unequal: begin: 2022-01-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_bolus_effect_df["assigned"] = False
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_ro

Unequal: begin: 2021-10-15 18:14:17 end: 2021-10-15 18:15:17
Unequal: begin: 2021-10-15 21:24:45 end: 2021-10-15 21:25:45
Unequal: begin: 2021-10-16 09:28:15 end: 2021-10-16 09:30:15
Unequal: begin: 2021-10-16 12:08:50 end: 2021-10-16 12:10:50
Unequal: begin: 2021-10-16 14:19:38 end: 2021-10-16 14:20:38
Unequal: begin: 2021-10-16 17:58:09 end: 2021-10-16 18:00:09
Unequal: begin: 2021-10-16 22:37:11 end: 2021-10-16 22:40:11
Unequal: begin: 2021-10-17 10:28:35 end: 2021-10-17 10:30:35
Unequal: begin: 2021-10-17 11:27:36 end: 2021-10-17 11:30:36
Unequal: begin: 2021-10-17 13:23:57 end: 2021-10-17 13:25:57
Unequal: begin: 2021-10-17 18:13:27 end: 2021-10-17 18:15:27
Unequal: begin: 2021-10-18 07:41:25 end: 2021-10-18 07:45:25
Unequal: begin: 2021-10-18 11:04:04 end: 2021-10-18 11:05:04
Unequal: begin: 2021-10-19 07:54:18 end: 2021-10-19 07:55:18
Unequal: begin: 2021-10-19 11:03:57 end: 2021-10-19 11:05:57
Unequal: begin: 2021-10-19 18:09:42 end: 2021-10-19 18:10:42
Unequal: begin: 2021-10-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_bolus_effect_df["assigned"] = False
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_meal_effect_df['assigned'] = False


Unequal: begin: 2022-01-02 09:28:51 end: 2022-01-02 09:30:51
Unequal: begin: 2022-01-02 13:17:31 end: 2022-01-02 13:20:31
Unequal: begin: 2022-01-02 15:41:58 end: 2022-01-02 15:45:58
Unequal: begin: 2022-01-02 17:48:09 end: 2022-01-02 17:50:09
Unequal: begin: 2022-01-02 22:54:15 end: 2022-01-02 22:55:15
Unequal: begin: 2022-01-03 07:08:57 end: 2022-01-03 07:10:57
Unequal: begin: 2022-01-03 07:17:37 end: 2022-01-03 07:20:37
Unequal: begin: 2022-01-03 15:04:09 end: 2022-01-03 15:05:09
Unequal: begin: 2022-01-03 20:29:07 end: 2022-01-03 20:30:07
Unequal: begin: 2022-01-04 07:19:33 end: 2022-01-04 07:20:33
Unequal: begin: 2022-01-04 14:57:57 end: 2022-01-04 15:00:57
Unequal: begin: 2022-01-04 20:22:26 end: 2022-01-04 20:25:26
Unequal: begin: 2022-01-05 08:02:32 end: 2022-01-05 08:05:32
Unequal: begin: 2022-01-05 19:57:55 end: 2022-01-05 20:00:55
Unequal: begin: 2022-01-06 13:06:02 end: 2022-01-06 13:10:02
Unequal: begin: 2022-01-06 20:52:19 end: 2022-01-06 20:55:19
Unequal: begin: 2022-01-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_bolus_effect_df["assigned"] = False
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_ro

Unequal: begin: 2022-01-14 10:07:11 end: 2022-01-14 10:10:11
Unequal: begin: 2022-01-14 13:41:11 end: 2022-01-14 13:45:11
Unequal: begin: 2022-01-14 17:06:24 end: 2022-01-14 17:10:24
Unequal: begin: 2022-01-14 22:48:43 end: 2022-01-14 22:50:43
Unequal: begin: 2022-01-15 05:43:21 end: 2022-01-15 05:45:21
Unequal: begin: 2022-01-15 07:39:18 end: 2022-01-15 07:40:18
Unequal: begin: 2022-01-15 13:14:44 end: 2022-01-15 13:15:44
Unequal: begin: 2022-01-15 17:03:18 end: 2022-01-15 17:05:18
Unequal: begin: 2022-01-15 21:39:50 end: 2022-01-15 21:40:50
Unequal: begin: 2022-01-16 07:27:52 end: 2022-01-16 07:30:52
Unequal: begin: 2022-01-16 17:37:33 end: 2022-01-16 17:40:33
Unequal: begin: 2022-01-16 20:28:57 end: 2022-01-16 20:30:57
Unequal: begin: 2022-01-16 23:19:50 end: 2022-01-16 23:20:50
Unequal: begin: 2022-01-17 07:24:39 end: 2022-01-17 07:25:39
Unequal: begin: 2022-01-17 17:33:57 end: 2022-01-17 17:35:57
Unequal: begin: 2022-01-17 18:59:23 end: 2022-01-17 19:00:23
Unequal: begin: 2022-01-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_bolus_effect_df["assigned"] = False
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_meal_effect_df['assigned'] = False


Unequal: begin: 2022-01-19 05:33:27 end: 2022-01-19 05:35:27
Unequal: begin: 2022-01-19 17:53:33 end: 2022-01-19 17:55:33
Unequal: begin: 2022-01-19 18:39:57 end: 2022-01-19 18:40:57
Unequal: begin: 2022-01-20 04:27:15 end: 2022-01-20 04:30:15
Unequal: begin: 2022-01-20 14:03:58 end: 2022-01-20 14:05:58
Unequal: begin: 2022-01-20 15:57:57 end: 2022-01-20 16:00:57
Unequal: begin: 2022-01-20 21:49:20 end: 2022-01-20 21:50:20
Unequal: begin: 2022-01-21 04:31:57 end: 2022-01-21 04:35:57
Unequal: begin: 2022-01-21 18:12:34 end: 2022-01-21 18:15:34
Unequal: begin: 2022-01-22 07:24:57 end: 2022-01-22 07:25:57
Unequal: begin: 2022-01-22 10:09:45 end: 2022-01-22 10:10:45
Unequal: begin: 2022-01-22 18:57:04 end: 2022-01-22 19:00:04
Unequal: begin: 2022-01-23 06:59:52 end: 2022-01-23 07:00:52
Unequal: begin: 2022-01-23 13:54:41 end: 2022-01-23 13:55:41
Unequal: begin: 2022-01-23 20:21:58 end: 2022-01-23 20:25:58
Unequal: begin: 2022-01-23 22:08:27 end: 2022-01-23 22:10:27
Unequal: begin: 2022-01-

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  whole_bolus_effect_df["assigned"] = False
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_row['bolus_effect'])
  segment_df.loc[closest_glucose_idx, 'bolus_effect'] = float(bolus_ro

In [None]:
# Save the processed data CAREFULL!!!
# Specify the file name
import os

filename = './processed_data/559_test_combined_segments_noshrink.pkl'

# Save the dictionary to a file
if not os.path.exists(filename):
    open(filename, 'wb').close()

# Save the dictionary to the file
with open(filename, 'wb') as f:
    pickle.dump(step_updated_segments, f)