In [34]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.ensemble import RandomForestRegressor
import xml.etree.ElementTree as ET
import csv
import datetime

In [26]:
# cols: id, weight, timestamp, blood glucose level, excercise (intensity*duration), sleep (duration*quality) - switch end and start, last_meal_time, last_meal_time_carbs, last_basal_time, last_basal_value, last_bolus_time, last_bolus_value, last_bolus_carbs
# note: blood glucose level is the target variable
# note: bolus has same start and end time, so we use the start time

In [99]:
# Importing the dataset
tree = ET.parse('../data/559-ws-testing.xml')
root = tree.getroot()

date_format = '%d-%m-%Y %H:%M:%S'

with open('../data/559-testing.csv', 'w', newline='') as csvfile:
    fieldnames = ['timestamp', 'glucose_level', 'exercise_today', 'sleep_last_night', 'last_meal_time', 'last_meal_time_carbs', 'last_basal_time', 'last_basal_value', 'last_bolus_time', 'last_bolus_value', 'last_bolus_carbs']
    writer = csv.DictWriter(csvfile, fieldnames=fieldnames)

    # Write header
    writer.writeheader()

    # Iterate over <glucose_level> elements and extract data
    datas = pd.DataFrame(columns=fieldnames)
    
    timestamps = []
    glucose_levels = []
    glucose_level = root.find('glucose_level')
    for event in glucose_level.findall('event'):
        ts = datetime.datetime.strptime(event.attrib['ts'], date_format)
        value = event.attrib['value']
        
        timestamps.append(ts)
        glucose_levels.append(value)
        
    datas['timestamp'] = timestamps
    datas['glucose_level'] = glucose_levels
    
    sleeps = []
    sleep = root.find('sleep')
    for sleep in sleep.findall('event'):
        row = {}
        # Switch start and end
        start = datetime.datetime.strptime(sleep.attrib['ts_end'], date_format)
        end = datetime.datetime.strptime(sleep.attrib['ts_begin'], date_format)
        
        duration = (end-start).total_seconds() / 3600 # convert to hours
        quality = int(sleep.attrib['quality'])
        
        row['start'] = start
        row['end'] = end
        row['value'] = duration * quality
        sleeps.append(row)
        
    i = 0
    j = 0
    while i < len(datas) and j < len(sleeps):
        if (datas['timestamp'].iloc[i] < sleeps[j]['start']):
            datas.at[i, 'sleep_last_night'] = sleeps[j-1]['value']
            i += 1
        elif (sleeps[j]['start'] <= datas['timestamp'].iloc[i] and sleeps[j]['end'] >= datas['timestamp'].iloc[i]):
            datas.at[i, 'sleep_last_night'] = 0
            i += 1
        else:
            j += 1
            
    exercises = []
    exercise = root.find('exercise')
    for e in exercise.findall('event'):
        row = {}
        start = datetime.datetime.strptime(e.attrib['ts'], date_format)
        duration = int(e.attrib['duration'])
        intensity = int(e.attrib['intensity'])
        
        row['start'] = start
        row['end'] = start + datetime.timedelta(hours=duration)
        row['value'] = duration * intensity
        exercises.append(row)
    
    
    datas['exercise_today'] = [0] * datas.shape[0]
    
    for j in range(len(exercises)):
        # get the next sleep end timestamp that is after the exercise start timestamp
        sleep_end = next((x['end'] for x in sleeps if x['end'] > exercises[j]['start']), None)
        
        datas.loc[(datas['timestamp'] > exercises[j]['start']) & (datas['timestamp'] < sleep_end), 'exercise_today'] += exercises[j]['value']
        
        # TODO: handle the case when the person is doing the exercise
        
    
    meals = []
    meal = root.find('meal')
    for m in meal.findall('event'):
        row = {}
        time = datetime.datetime.strptime(m.attrib['ts'], date_format)
        carbs = int(m.attrib['carbs'])
        
        row['time'] = time
        row['value'] = carbs
        meals.append(row)
    
    i = 0
    while i < len(meals)-1:
        datas.loc[(datas['timestamp'] > meals[i]['time']) & (datas['timestamp'] < meals[i+1]['time']), 'last_meal_time'] = datas['timestamp'].apply(lambda x: (x - meals[i]['time']).total_seconds() / 3600)
        
        datas.loc[(datas['timestamp'] > meals[i]['time']) & (datas['timestamp'] < meals[i+1]['time']), 'last_meal_time_carbs'] = meals[i]['value']
        i += 1
    

    datas.loc[datas['timestamp'] > meals[i]['time'], 'last_meal_time'] = datas['timestamp'].apply(lambda x: (x - meals[i]['time']).total_seconds() / 3600)
    datas.loc[datas['timestamp'] > meals[i]['time'], 'last_meal_time_carbs'] = meals[i]['value']
    
    basals = []
    basal = root.find('basal')
    for b in basal.findall('event'):
        row = {}
        time = datetime.datetime.strptime(b.attrib['ts'], date_format)
        value = float(b.attrib['value'])
        
        row['time'] = time
        row['value'] = value
        basals.append(row)
        
    i = 0
    while i < len(basals)-1:
        datas.loc[(datas['timestamp'] > basals[i]['time']) & (datas['timestamp'] < basals[i+1]['time']), 'last_basal_time'] = datas['timestamp'].apply(lambda x: (x - basals[i]['time']).total_seconds() / 3600)
        datas.loc[(datas['timestamp'] > basals[i]['time']) & (datas['timestamp'] < basals[i+1]['time']), 'last_basal_value'] = basals[i]['value']
        i += 1
        
    datas.loc[datas['timestamp'] > basals[i]['time'], 'last_basal_time'] = datas['timestamp'].apply(lambda x: (x - basals[i]['time']).total_seconds() / 3600)
    datas.loc[datas['timestamp'] > basals[i]['time'], 'last_basal_value'] = basals[i]['value']
    
    boluses = []
    bolus = root.find('bolus')
    for b in bolus.findall('event'):
        row = {}
        start = datetime.datetime.strptime(b.attrib['ts_begin'], date_format)
        end = datetime.datetime.strptime(b.attrib['ts_end'], date_format)
        dose = float(b.attrib['dose'])
        carbs = int(b.attrib['bwz_carb_input'])
        
        row['start'] = start
        row['end'] = end
        row['dose'] = dose
        row['carbs'] = carbs
        boluses.append(row)
        
    i = 0
    while i < len(boluses)-1:
        datas.loc[(datas['timestamp'] > boluses[i]['start']) & (datas['timestamp'] < boluses[i+1]['start']), 'last_bolus_time'] = datas['timestamp'].apply(lambda x: (x - boluses[i]['start']).total_seconds() / 3600)
        datas.loc[(datas['timestamp'] > boluses[i]['start']) & (datas['timestamp'] < boluses[i+1]['start']), 'last_bolus_value'] = boluses[i]['dose']
        datas.loc[(datas['timestamp'] > boluses[i]['start']) & (datas['timestamp'] < boluses[i+1]['start']), 'last_bolus_carbs'] = boluses[i]['carbs']
        i += 1
    
    datas.loc[datas['timestamp'] > boluses[i]['start'], 'last_bolus_time'] = datas['timestamp'].apply(lambda x: (x - boluses[i]['start']).total_seconds() / 3600)
    datas.loc[datas['timestamp'] > boluses[i]['start'], 'last_bolus_value'] = boluses[i]['dose']
    datas.loc[datas['timestamp'] > boluses[i]['start'], 'last_bolus_carbs'] = boluses[i]['carbs']
    
    
    datas.to_csv('../data/559-testing.csv', mode='w', header=True, index=False)
    
print("CSV file generated successfully!")

CSV file generated successfully!
