In [None]:
import pandas as pd
import xml.etree.ElementTree as et
import io
import os
import numpy as np
import datetime
from dateutil.parser import parse

path = '../data/training/'

In [None]:
def iter_element(etree, tag):
    i=0
    for element in etree.find(tag):
        i += 1
        for key, value in element.attrib.items():   
            yield i, key, value
            
def compute_interval(df, threshold=5, interval_name='ts'):
    
    # Timestamp interval bins in minutes
    threshold = 5
    threshold_ns = threshold * 60 * 1e9
    
    df['interval'] = pd.to_datetime(np.round(df[interval_name].astype(np.int64) / threshold_ns) * threshold_ns)
    
    return df

def calculate_duration(x,y):
    if x != '' and y != '':
        if y>x:
            duration = y-x
        else:
            duration = x-y
        return str(datetime.timedelta(seconds=int(duration.total_seconds())))
    return ''

def get_data(etree, tag, interval):
    df = pd.DataFrame(iter_element(etree, tag))
    df = df.pivot(columns=1, index=0)
    df.columns = df.columns.levels[1]
    if 'ts' in df.columns:
        df.ts = df.ts.apply(lambda x: datetime.datetime.strptime(x, '%d-%m-%Y %H:%M:%S'))
        df = compute_interval(df, interval, 'ts')
    elif 'ts_begin' in df.columns:
        df['ts_begin'] = df['ts_begin'].apply(lambda x: datetime.datetime.strptime(x, '%d-%m-%Y %H:%M:%S') if x != '' else x)
        df['ts_end'] = df['ts_end'].apply(lambda x: datetime.datetime.strptime(x, '%d-%m-%Y %H:%M:%S') if x != '' else x)
        df['duration'] = df.apply(lambda x: calculate_duration(x['ts_begin'], x['ts_end']), axis=1)
        df = compute_interval(df, interval, 'ts_begin')
    elif 'tbegin' in df.columns:
        df['tbegin'] = df['tbegin'].apply(lambda x: datetime.datetime.strptime(x, '%d-%m-%Y %H:%M:%S') if x != '' else x)
        df['tend'] = df['tend'].apply(lambda x: datetime.datetime.strptime(x, '%d-%m-%Y %H:%M:%S') if x != '' else x)
        df['duration'] = df.apply(lambda x: calculate_duration(x['tbegin'], x['tend']), axis=1)
        df = compute_interval(df, interval, 'tbegin')
    return df

In [None]:
tree = et.parse(os.path.join(path, '559-ws-training.xml'))

In [None]:
root = tree.getroot()
root.attrib

In [None]:
glucose_level = get_data(root, 'glucose_level', 5)
glucose_level.head(10)

In [None]:
finger_stick = get_data(root, 'finger_stick', 5)
finger_stick.head(10)

In [None]:
basal = get_data(root, 'basal', 5)
basal.head(10)

In [None]:
temp_basal = get_data(root, 'temp_basal', 5)
temp_basal.head(10)

In [None]:
bolus = get_data(root, 'bolus', 5)
bolus.head(10)

In [None]:
meal = get_data(root, 'meal', 5)
meal.head(10)

In [None]:
sleep = get_data(root, 'sleep', 5)
sleep.head(10)

In [None]:
work = get_data(root, 'work', 5)
work.head(10)

In [None]:
stressors = get_data(root, 'stressors', 5)
stressors.head(10)

In [None]:
hypo_event = get_data(root, 'hypo_event', 5)
hypo_event.head(10)

In [None]:
illness = get_data(root, 'illness', 5)
illness.head(10)

In [None]:
exercise = get_data(root, 'exercise', 5)
exercise.head(10)

In [None]:
hr = get_data(root, 'basis_heart_rate', 5)
hr.head(10)

In [None]:
gsr = get_data(root, 'basis_gsr', 5)
gsr.head(10)

In [None]:
st = get_data(root, 'basis_skin_temperature', 5)
st.head(10)

In [None]:
at = get_data(root, 'basis_air_temperature', 5)
at.head(10)

In [None]:
steps = get_data(root, 'basis_steps', 5)
steps.head(10)

In [None]:
basis_sleep = get_data(root, 'basis_sleep', 5)
basis_sleep.head(10)