In [None]:
import pandas as pd
import numpy as np
import altair as alt
import os
from datetime import datetime as dt
from datetime import timedelta as td

# If jedi can't find force
%config Completer.use_jedi = False


# Step 1: Data import

In [None]:
base_dir_path = os.path.join('..', 'MiFit_data')

heart_rate_path = os.path.join (base_dir_path, 'HEARTRATE_AUTO', 'HEARTRATE_AUTO_1609608373273.csv')
sleep_path = os.path.join (base_dir_path, 'SLEEP', 'SLEEP_1609608372781.csv')
sport_path = os.path.join (base_dir_path, 'SPORT', 'SPORT_1609608373447.csv')
activity_path = os.path.join (base_dir_path, 'ACTIVITY', 'ACTIVITY_1609608371719.csv')
activity_minute_path = os.path.join (base_dir_path, 'ACTIVITY_MINUTE', 'ACTIVITY_MINUTE_1609608372267.csv')

# TODO, FEATURE: Use user input for base_dir (and traverse directories) instead of hard-coding.


# Step 2: Data examination and processing

In [None]:
try:
    heart_rate_df = pd.read_csv (heart_rate_path)
    sleep_df = pd.read_csv (sleep_path)
    sport_df = pd.read_csv (sport_path)
    activity_df = pd.read_csv (activity_path)
    activity_minute_df = pd.read_csv (activity_minute_path)
    
except Exception as e:
    # TODO: Better exception handling
    print (e, '\n')
    raise


In [None]:
# Needed in order to drop rows that are outside of year 2020.
start_date = pd.to_datetime('1/1/2020 00:00')
end_date = pd.to_datetime('12/31/2020 23:59')

### Heart rate

In [None]:
heart_rate_df['time'] = pd.to_datetime(heart_rate_df['date'] + ' ' + heart_rate_df['time'])#, format='%H:%M')
heart_rate_df['date'] = pd.to_datetime (heart_rate_df['date'])
# This has to be done this way to ensure that vega-lite correctly serialize time in ISO format
# See: https://github.com/altair-viz/altair/issues/2199
heart_rate_df


In [None]:
heart_rate_df = heart_rate_df[heart_rate_df.date.between(start_date, end_date)]
heart_rate_df

In [None]:
heart_rate_df.head(3)

### Sleep

In [None]:
sleep_df['date'] = pd.to_datetime (sleep_df['date'])
sleep_df

# wakeTime = number of waking-ups during sleep
# start = start of sleep in Unix time

In [None]:
new_i = []
new_j = []
new_k = []

for i, row in sleep_df.iterrows():
    new_i.append(dt.fromtimestamp (row['lastSyncTime']))
    new_j.append(dt.fromtimestamp (row['start']))
    new_k.append(dt.fromtimestamp (row['stop']))

sleep_df['lastSyncTime_new'] = pd.to_datetime(pd.Series(new_i))
sleep_df['start_new'] = pd.to_datetime(pd.Series(new_j))
sleep_df['stop_new'] = pd.to_datetime(pd.Series(new_k))


In [None]:
sleep_df

In [None]:
sleep_df = sleep_df[sleep_df.date.between(start_date, end_date)]
sleep_df

In [None]:
sleep_df.head(3)

### Sport

In [None]:
new_i = []
new_j = []
new_k = []

for i, row in sport_df.iterrows():
    new_i.append(dt.fromtimestamp (row['startTime']))
    new_j.append(td (seconds = row['sportTime']))
    new_k.append(dt.fromtimestamp (row['startTime'] + row['sportTime']))

sport_df['startTime_new'] = pd.to_datetime(pd.Series(new_i))
sport_df['sportTime_new'] = pd.Series(new_j)
sport_df['endTime_new'] = pd.to_datetime(pd.Series(new_k))

# type 16 = freestyle, 6 = running/walking

In [None]:
sport_df

In [None]:
sport_df = sport_df[sport_df.startTime_new.between(start_date, end_date)]
sport_df

In [None]:
sport_df.head(3)

### Activity

In [None]:
new_i = []

for i, row in activity_df.iterrows():
    new_i.append(dt.fromtimestamp (row['lastSyncTime']))

activity_df['lastSyncTime_new'] = pd.to_datetime(pd.Series(new_i))
activity_df['date'] = pd.to_datetime (activity_df['date'])

In [None]:
activity_df

In [None]:
activity_df = activity_df[activity_df.date.between(start_date, end_date)]
activity_df

In [None]:
activity_df.head(3)

### Activity minute

In [None]:
activity_minute_df

In [None]:
activity_minute_df['time'] = pd.to_datetime (activity_minute_df['date'] + ' ' + activity_minute_df['time'])
# Same as for heart rate
activity_minute_df['date'] = pd.to_datetime (activity_minute_df['date'])
activity_minute_df

In [None]:
activity_minute_df = activity_minute_df[activity_minute_df.date.between(start_date, end_date)]
activity_minute_df

In [None]:
activity_minute_df.head(3)

# Step 3: Data visualization

### Heart rate

In [None]:
heart_rate_first_day_df = heart_rate_df.loc[heart_rate_df['date'] == '2020-01-01']
heart_rate_first_day_df

In [None]:
heart_rate_day_plt = alt.Chart(heart_rate_first_day_df).mark_line(color = 'firebrick').encode(
    alt.X ('time:T', timeUnit='hoursminutes'),
    alt.Y ('heartRate:Q')
).properties (width = 700)

heart_rate_day_plt

### Sleep

### Sport

### Activity

### Activity minute

## Step 3.1: Summary creation