In [1]:
import pandas as pd
import numpy as np
import altair as alt
import os
from datetime import datetime as dt
from datetime import timedelta as td

# If jedi can't find force
#%config Completer.use_jedi = False


# Step 1: Data import

In [2]:
base_dir_path = os.path.join('..', 'MiFit_data')

heart_rate_path = os.path.join (base_dir_path, 'HEARTRATE_AUTO', 'HEARTRATE_AUTO_1609608373273.csv')
sleep_path = os.path.join (base_dir_path, 'SLEEP', 'SLEEP_1609608372781.csv')
sport_path = os.path.join (base_dir_path, 'SPORT', 'SPORT_1609608373447.csv')
activity_path = os.path.join (base_dir_path, 'ACTIVITY', 'ACTIVITY_1609608371719.csv')
activity_minute_path = os.path.join (base_dir_path, 'ACTIVITY_MINUTE', 'ACTIVITY_MINUTE_1609608372267.csv')

# TODO, FEATURE: Use user input for base_dir (and traverse directories) instead of hard-coding.


# Step 2: Data examination and processing

In [3]:
try:
    heart_rate_df = pd.read_csv (heart_rate_path)
    sleep_df = pd.read_csv (sleep_path)
    sport_df = pd.read_csv (sport_path)
    activity_df = pd.read_csv (activity_path)
    activity_minute_df = pd.read_csv (activity_minute_path)
    
except Exception as e:
    # TODO: Better exception handling
    print (e, '\n')
    raise


### Heart rate

In [None]:
heart_rate_df['date'] = pd.to_datetime (heart_rate_df['date'])
heart_rate_df


In [5]:
# Droping rows that are outside of year 2020.
start_date = pd.to_datetime('1/1/2020 00:00')
end_date = pd.to_datetime('12/31/2020 23:59')

In [None]:
heart_rate_df = heart_rate_df[heart_rate_df.date.between(start_date, end_date)]
heart_rate_df

In [7]:
heart_rate_df.head(3)

Unnamed: 0,date,time,heartRate
0,2020-01-03,00:00,61
1,2020-01-03,00:02,71
2,2020-01-03,00:04,61


### Sleep

In [None]:
sleep_df['date'] = pd.to_datetime (sleep_df['date'])
sleep_df

# wakeTime = number of waking-ups during sleep
# start = start of sleep in Unix time

In [9]:
new_i = []
new_j = []
new_k = []

for i, row in sleep_df.iterrows():
    new_i.append(dt.fromtimestamp (row['lastSyncTime']))
    new_j.append(dt.fromtimestamp (row['start']))
    new_k.append(dt.fromtimestamp (row['stop']))

sleep_df['lastSyncTime_new'] = pd.to_datetime(pd.Series(new_i))
sleep_df['start_new'] = pd.to_datetime(pd.Series(new_j))
sleep_df['stop_new'] = pd.to_datetime(pd.Series(new_k))


In [None]:
sleep_df

In [None]:
sleep_df = sleep_df[sleep_df.date.between(start_date, end_date)]
sleep_df

In [12]:
sleep_df.head(3)

Unnamed: 0,date,lastSyncTime,deepSleepTime,shallowSleepTime,wakeTime,start,stop,lastSyncTime_new,start_new,stop_new
61,2020-01-01,1577948986,169,177,0,1577836440,1577857200,2020-01-02 08:09:46,2020-01-01 00:54:00,2020-01-01 06:40:00
62,2020-01-02,1578037754,201,313,0,1577916540,1577947380,2020-01-03 08:49:14,2020-01-01 23:09:00,2020-01-02 07:43:00
63,2020-01-03,1578123391,146,479,63,1578004620,1578045900,2020-01-04 08:36:31,2020-01-02 23:37:00,2020-01-03 11:05:00


### Sport

In [13]:
new_i = []
new_j = []
new_k = []

for i, row in sport_df.iterrows():
    new_i.append(dt.fromtimestamp (row['startTime']))
    new_j.append(td (seconds = row['sportTime']))
    new_k.append(dt.fromtimestamp (row['startTime'] + row['sportTime']))

sport_df['startTime_new'] = pd.to_datetime(pd.Series(new_i))
sport_df['sportTime_new'] = pd.Series(new_j)
sport_df['endTime_new'] = pd.to_datetime(pd.Series(new_k))

# type 16 = freestyle, 6 = running/walking

In [None]:
sport_df

In [None]:
sport_df = sport_df[sport_df.startTime_new.between(start_date, end_date)]
sport_df

In [16]:
sport_df.head(3)

Unnamed: 0,type,startTime,sportTime,distance,maxPace,minPace,avgPace,calories,startTime_new,sportTime_new,endTime_new
0,16,1609422578,2924,0.0,1.8,0.0,0.0,217.0,2020-12-31 14:49:38,0 days 00:48:44,2020-12-31 15:38:22
1,16,1609249931,3118,0.0,1.8,0.0,0.0,208.0,2020-12-29 14:52:11,0 days 00:51:58,2020-12-29 15:44:09
2,16,1609249859,64,0.0,1.8,0.0,0.0,2.0,2020-12-29 14:50:59,0 days 00:01:04,2020-12-29 14:52:03


### Activity

In [17]:
new_i = []

for i, row in activity_df.iterrows():
    new_i.append(dt.fromtimestamp (row['lastSyncTime']))

activity_df['lastSyncTime_new'] = pd.to_datetime(pd.Series(new_i))
activity_df['date'] = pd.to_datetime (activity_df['date'])

In [None]:
activity_df

In [None]:
activity_df = activity_df[activity_df.date.between(start_date, end_date)]
activity_df

In [20]:
activity_df.head(3)

Unnamed: 0,date,lastSyncTime,steps,distance,runDistance,calories,lastSyncTime_new
61,2020-01-01,1577948986,5153,3619,280,129,2020-01-02 08:09:46
62,2020-01-02,1578037754,601,409,154,22,2020-01-03 08:49:14
63,2020-01-03,1578123391,1084,738,116,29,2020-01-04 08:36:31


### Activity minute

In [None]:
activity_minute_df['date'] = pd.to_datetime (activity_minute_df['date'])
activity_minute_df

In [None]:
activity_minute_df = activity_minute_df[activity_minute_df.date.between(start_date, end_date)]
activity_minute_df

In [23]:
activity_minute_df.head(3)

Unnamed: 0,date,time,steps
0,2020-01-03,03:49,10
1,2020-01-03,09:44,28
2,2020-01-03,10:05,22


# Step 3: Data visualization

### Heart rate

### Sleep

### Sport

### Activity

### Activity minute

## Step 3.1: Summary creation