In [257]:
%load_ext autoreload
%autoreload 2

The autoreload extension is already loaded. To reload it, use:
  %reload_ext autoreload


In [258]:
import sys
sys.path.append('../src/')
from plotutils import plotts
from wahooreader import WahooTickrX
from bokeh.io import output_notebook, show, save, output_file
from bokeh.layouts import gridplot, Column, Row
from bokeh.models.widgets import DatePicker, RangeSlider
from bokeh.models import CustomJS, Div, Markup, ColumnDataSource, HoverTool, Legend, Range1d, DataRange1d, TapTool, OpenURL, Button, Band
from bokeh.plotting import figure

from scipy import stats
from datetime import datetime
import glob
import os
import time
from pathlib import Path
import pandas as pd
import dask
import dask.dataframe as dd
from dask.diagnostics import ProgressBar
from datetime import datetime
import matplotlib.pyplot as plt
import numpy as np
import statsmodels.api as sm

from getpass import getpass
from wodupcrawler import WodUp
import json

output_notebook()

# ETL process for `.fit` files.

The script below reads in `.fit` files from a directory and outputs processed files in two directories. One for the time series `.csv`-s and one for the summary stat `.csv`-s. Both directory contains one file per Wahoo workout session. The directory filepaths are hardcoded in the script. 

In [None]:
!python ../src/fitetl.py

In [3]:
# Specify plot window start date
plot_window = pd.Timedelta('30 days')

# Sleep

In [321]:
list(set(['A', 'B']) - set(['A']))

['B']

In [315]:
df_sleep['start_time'] = df_sleep['start'].dt.strftime('%I:%M %p')
df_sleep['start_time'] = df_sleep['start'].dt.strftime('%I:%M %p')

0     09:12 PM
1     08:59 PM
2     08:55 PM
3     08:21 PM
4     10:00 PM
5     08:34 PM
6     10:05 PM
7     10:39 PM
8     08:30 PM
9     09:35 PM
10    09:53 PM
11    08:30 PM
12    08:07 PM
13    07:55 PM
14    09:11 PM
15    09:05 PM
16    08:35 PM
17    09:27 PM
18    07:42 PM
19    07:40 PM
20    07:35 PM
21    08:15 PM
22    07:49 PM
Name: start, dtype: object

In [303]:
df_sleep = pd.read_csv('../data/sleep.csv', parse_dates=['start', 'end', 'date'])

p5, p5_cds = plotts(
    df_sleep,
    plot_height=350, 
    plot_width=450,
    xvar='date',
    ys=['end_hour', 'start_hour'],
    units=['hour'],
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    ymin=22,
    styles=['b'], 
    bounded_bar_label='sleep',
    title='120 sec HR recovery trend',
);


In [296]:
p5.vbar?

In [304]:
p5 = figure(
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window), 
    x_axis_type="datetime",
    plot_height=350, 
    plot_width=450,
    title="Sleep stages", 
)
data = ColumnDataSource(df_sleep)
p5.add_layout(Legend(), 'center')

p5.vbar(
        x='date',
        top='end_hour',
        fill_color='grey',
        width=[24*60*60*900],
        alpha=0.5,
        source=data
)

show(p5)

RuntimeError: 

Expected width to reference fields in the supplied data source.

When a 'source' argument is passed to a glyph method, values that are sequences
(like lists or arrays) must come from references to data columns in the source.

For instance, as an example:

    source = ColumnDataSource(data=dict(x=a_list, y=an_array))

    p.circle(x='x', y='y', source=source, ...) # pass column names and a source

Alternatively, *all* data sequences may be provided as literals as long as a
source is *not* provided:

    p.circle(x=a_list, y=an_array, ...)  # pass actual sequences and no source



In [265]:
df_sleep = pd.read_csv('../data/sleep.csv', parse_dates=['start', 'end', 'date'])
stages = ["deep", "rem", "light", "awake"]
colors = ['#154ba6', '#3f8dff', '#7ec4ff', '#e73360']
df_sleep['7.5hr'] = 450
df_sleep['time_asleep'] = df_sleep['deep'] + df_sleep['rem'] + df_sleep['light']
df_sleep['7day_avg'] = df_sleep.set_index('date')['time_asleep'].rolling('7d', closed='right').mean().reset_index()['time_asleep']
df_sleep['date_str'] = df_sleep['date'].dt.strftime('%a %b %d %Y')

data = ColumnDataSource(df_sleep)


p4 = figure(
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window), 
    x_axis_type="datetime",
    plot_height=350, 
    plot_width=450,
    title="Sleep stages", 
)

p4.add_layout(Legend(), 'center')

p4.vbar_stack(stages, x='date', width=24*60*60*900, color=colors, source=data, legend=[[s] for s in stages])
p4.line(x='date', y='7.5hr', source=data, color='grey', line_dash="4 4")
p4.line(x='date', y='7day_avg', source=data)

p4.y_range.start = 0
p4.x_range.range_padding = 0.1
p4.xgrid.grid_line_color = None
p4.axis.minor_tick_line_color = None
p4.outline_line_color = None
p4.add_tools(HoverTool(
        tooltips=[
            ("Awake", "@awake"),
            ("REM", "@rem"),
            ("Light", "@light"),
            ("Deep", "@deep"),
            ("Date", "@date_str")
        ]
    ))

p4.legend.click_policy = 'hide'
p4.legend.orientation = "horizontal"
p4.legend.location = 'top_left'


p5, p5_cds = plotts(
    df_sleep,
    plot_height=350, 
    plot_width=450,
    xvar='date',
    ys=['end_hour', 'start_hour'],
    units=['hour'],
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    ymin=22,
    styles=['|'], 
    title='120 sec HR recovery trend',
    show_plot=False
);

show(p4)

# PR table


In [217]:
df_pr

Unnamed: 0,reps,date_front_squat,front_squat,date_back_squat,back_squat,date_deadlift,deadlift,date_barbell_bench_press,barbell_bench_press
0,0.0,2020-04-30,225.0,2020-10-05,295.0,2020-10-07,365.0,2020-07-31,230.0
1,1.0,2020-04-30,225.0,2020-06-28,295.0,2020-08-24,365.0,2020-07-31,230.0
2,2.0,2020-04-30,205.0,2020-09-24,275.0,2020-09-21,315.0,2020-07-31,210.0
3,3.0,2020-06-17,205.0,2020-10-26,275.0,2020-10-30,315.0,2020-10-28,210.0
4,4.0,2020-04-30,205.0,2020-09-24,255.0,2020-09-21,305.0,2020-04-30,185.0
5,5.0,2020-06-17,205.0,2020-10-01,250.0,2020-09-29,305.0,2020-07-31,185.0
6,6.0,2020-04-30,155.0,2020-10-19,245.0,2020-10-23,305.0,2020-10-21,185.0
7,7.0,2020-04-30,155.0,2020-04-05,225.0,2020-04-17,235.0,2020-04-30,155.0
8,8.0,2020-04-30,155.0,2020-10-12,225.0,2020-10-16,235.0,2020-10-14,155.0
9,9.0,2020-01-11,95.0,2020-08-13,95.0,,,2019-10-23,85.0


In [193]:
df_pr = pd.read_csv('../../WodUp-Scraper/data/hasannagib-pr-table.csv')

plotts(
    df_pr, 
    ys=['front_squat', 'back_squat', 'deadlift', 'barbell_bench_press'], 
    hover_vars=['date_front_squat', 'date_back_squat', 'date_deadlift', 'date_barbell_bench_press'], 
    xvar='reps', 
    styles=['-o'],
    x_axis_type='linear',
    ylabel='Weight (lbs)',
    xlabel='Reps',
    title='PR Table'
);

In [269]:
from bokeh.models.widgets import Panel, Tabs
tabs = []

for i in [1,2,3,4,5]:

    df_plot=[]
    for movement in ['front-squat', 'back-squat', 'deadlift', 'barbell-bench-press']:
        df_hist=pd.read_csv(f'../../WodUp-Scraper/data/hasannagib-{movement}.csv', parse_dates=['date'])
        df = df_hist.query(f'(reps>={i})').sort_values('date')
        df_plot.append(np.maximum.accumulate(df).set_index('date')[['weights']].rename(columns={'weights':movement.replace('-', '_')}).sort_index()
    )

    p, _ = plotts(
        pd.concat(df_plot),
        xvar='date',
        styles=['o-'],
        units=['lbs'],
        x_axis_type='datetime',
        title=f'{i} Rep Max',
        xlabel='Date',
        ylabel='Weigt (lbs)',
        plot_height=425,
        plot_width=450,
        show_plot=False,
        legend_position='center',
        legend_location='bottom_right',
        legend_orientation='vertical',
    );
    
    tabs.append(Panel(child=p, title=f"{i}-RM"))

tabs = Tabs(tabs=tabs, tabs_location='above', width_policy='fit')

show(tabs)

# HR Recovery Trends

For each workout session, find the 120 second heart rate recovery. Compare this against standard threshold. Note: Above `L1` indicates biological age is less than calendar age.

In [62]:
datadir_hrsum = '/Users/hasannagib/Documents/s3stage/wahoo/heartrate_sumstat/'
df = dd.read_csv(Path(f'{datadir_hrsum}*.csv')).compute()
df = df.rename(columns={'Unnamed: 0': 'timestamp'})
df['timestamp'] = pd.to_datetime(df['timestamp'])
df = df.set_index('timestamp')

df_bar = df.copy() 
df_bar['L0'] = 22
df_bar['L1'] = 53
df_bar['L2'] = 59
df_bar['L3'] = 66

df_bar = df_bar.reset_index()
df_bar['timestamp'] = pd.to_datetime(df_bar['timestamp'].dt.strftime('%Y-%m-%d 07:00:00'))
df_bar = df_bar.set_index('timestamp')

lowess = sm.nonparametric.lowess
lowess_curve = lowess(df_bar['120_sec_rec'], np.array((df_bar.index - df_bar.index[0]).days))
df_bar['lowess'] = lowess_curve[:,1]

p1, p1_cds = plotts(
    df_bar[['L0','L1', 'L2','L3', '120_sec_rec']], 
    units=['bpm'],
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    ymin=22,
    styles=['-']*4 + ['|'], 
    title='120 sec HR recovery trend',
);

# COVID Analysis

Looks like HR recovery improved post COVID lockdown 2.0 that happened on Fri Oct 9, 2020.

In [64]:
df_bar['pre-lockdown-2.0'] = (df_bar.loc['2020-09-14':'2020-10-09'].mean()['120_sec_rec']*(df_bar.index < '2020-10-09'))
df_bar['pre-lockdown-2.0'] = df_bar['pre-lockdown-2.0'].apply(lambda x: None if x==0 else x)
df_bar['post-lockdown-2.0'] = (df_bar.loc['2020-10-10':].mean()['120_sec_rec']*(df_bar.index > '2020-10-09'))
df_bar['post-lockdown-2.0'] = df_bar['post-lockdown-2.0'].apply(lambda x: None if x==0 else x)

In [65]:
plotts(
    df_bar, 
    ys=['120_sec_rec', 'pre-lockdown-2.0', 'post-lockdown-2.0'],
    styles=['o-']+3*['-'],
    plot_width=700,
    palette=['grey', 'blue', 'red'],
    units=['min'],
    title='2 min HR recovery',
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    ylabel='Minutes',
    trace=False
);

In [None]:
stats.ttest_ind(
    df_bar['120_sec_rec'].loc['2020-09-14':'2020-10-09'], 
    df_bar['120_sec_rec'].loc['2020-10-10':] 
)


WARNING: Note that HR recovery data is auto-correlated and thus doing a strandard t-test is not quite appropriate. It would be more appropriate to build a time series model and do t-test on the residuals. 

We can also apply a LOWESS filter to visualize the change in trend:

In [None]:
plt.figure(figsize=(8,4))
sns.regplot(
    list((df_bar.index - df_bar.index[0]).days), 
    df_bar['120_sec_rec'],
    lowess=True,
);
plt.grid()
plt.xlabel('days')
plt.ylabel('2 min HR recovery post workout')
plt.title('HR recovery trend - LOWESS');

# HR Zone Trends

How much time am I spending in different HR zones? e.g. Excessive time spent in peak zone for prolongned periods of time might not be a good idea and lead to overtrining. This should be varied and somewhat cyclical.

In [None]:
p2, p2_cds = plotts(
    (df.rolling(7).sum().dropna()/60), 
    ys=['174_', '152_173', '138_151'],
    styles=['o-'],
    units=['min'],
    title='Time spent in HR zones (7 day rolling sum)',
    x_range=p1.x_range,
    ylabel='Minutes',
    trace=False
);

# HR Time Series

Read in the raw time series HR date for all dates and concatenate them in a single dataframe for interactive plotting with Bokeh calendar date picker. 

In [None]:
ts_files = sorted(glob.glob('/Users/hasannagib/Documents/s3stage/wahoo/heartrate_ts/*.csv'))

@dask.delayed
def read_ts(file):
    df = pd.read_csv(file,parse_dates=['timestamp']
    ).set_index('timestamp').sort_index().reset_index()[
        ['heart_rate']
    ].rename(columns={
        'heart_rate':pd.to_datetime(os.path.basename(file)[:-11]).strftime('%a %b %d %Y'),
        
    })
    return df

dfs = [read_ts(file) for file in ts_files]

with ProgressBar():
    dfs = dask.compute(dfs)[0]

df_ts = pd.concat(dfs, axis=1).reset_index().rename(columns={'index':'s'})
df_ts['Time'] = df_ts['s'].apply(lambda x: time.strftime('%H:%M:%S', time.gmtime(x)))

# Pick latest date for HR data
df_ts['BPM'] = df_ts.iloc[:,-2]

In [None]:
def plot_cal_ts(df_ts):
    p = figure(
        width=450, 
        height=300, 
        title='Heart Rate',
        x_axis_label='Time (seconds)',
        y_axis_label='BPM',
        toolbar_location="above",
        tooltips=[
            ('Time','@Time'),
            ('BPM', '@BPM'),
        ]
    )

    cds = ColumnDataSource(df_ts)
    p.line('s', 'BPM', source=cds, color="black", alpha=0)
    
    band = Band(base='s', upper='BPM', source=cds, level='underlay',
            fill_alpha=0.95, fill_color='#ab383a')
    p.add_layout(band)
    return p, cds

In [None]:
show(plot_cal_ts(df_ts)[0])

# WodUp Session urls

In [None]:
with open('../data/session_urls.json') as json_file:
    urls = json.load(json_file)
    
with open('../data/session_wods.json') as json_file:
    wods = json.load(json_file)
    
# Get list of dates to look urls for
dts = []
for f in ts_files:
    dt = os.path.basename(f)[:10]
    if pd.to_datetime(dt) > pd.to_datetime('2020-09-01'):
        dts.append(dt)
        
if set(dts) - set(wods.keys()):    
    wu = WodUp(
        email='hasan.nagib@gmail.com', 
        password=getpass('Enter WodUp password:'),
        username='hasannagib'
    )

    wu.session_urls = urls
    wu.session_wods = wods

    # Add missing urls
    urls = wu.get_session_urls(dts)
    wods = wu.get_session_wods()

    # Save json
    with open('../data/session_urls.json', 'w') as outfile:
        json.dump(urls, outfile)    

    with open('../data/session_wods.json', 'w') as outfile:
        json.dump(wods, outfile)

    wu.browser.quit()

In [None]:
header="""
<div style="style=font-family:courier; color:grey; margin-left: 40px; width: 350px; float: left;"> 
<h1>Training Dashboard</h1> 
<p>Sleep data is sourced from Fitbit sleep logs.</p>
</div>
"""
div_header = Div(text=header)
show(div_header)

In [None]:
hr_rec="""
<div style="style=font-family:courier; color:grey; margin-left: 40px; width: 350px; float: left;"> 
<h2>Heart Rate</h2>
<p>Heart rate recovery greater than 53bpm in 2 minutes indicates that biological age is younger than calendar age. 
Click on any bar to see workout and HR profile for that day.
</p>
</div>
"""
hr_rec = Div(text=hr_rec)
show(hr_rec)

In [None]:
A=wods[dts[-1]][0]
B=wods[dts[-1]][1]

html ="""
"""

html ="""
<p> &nbsp;&nbsp; </p>
<div style="width: 100%; overflow: hidden;">
     <div style="margin-left: 100px; width: 350px; float: left;"> {A} &nbsp; {B} </div>
</div>
"""

div = Div(text=html.format(A=A, B=B))
show(div)

In [None]:
div.css_classes = ["""
.fa {
  padding: 20px;
  font-size: 30px;
  width: 50px;
  text-align: center;
  text-decoration: none;
}

/* Add a hover effect if you want */
.fa:hover {
  opacity: 0.7;
}

/* Set a specific color for each brand */

/* Facebook */
.fa-facebook {
  background: #3B5998;
  color: white;
}

/* Twitter */
.fa-twitter {
  background: #55ACEE;
  color: white;
}
"""]

# Dashboard

In [None]:
plot_window = pd.Timedelta('30 days')

p1, p1_cds = plotts(
    df_bar[['L0','L1', 'L2','L3','120_sec_rec']], 
    units=['bpm'],
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    styles=['--']*4 + 2*['|'], 
    title='120 sec HR recovery trend',
    ylabel='Beats',
    plot_width=450,
    show_plot=False
);


p2, p2_cds = plotts(
    (df.rolling(7).sum().dropna()/60), 
    ys=['174_', '152_173', '138_151'],
    styles=['o-'],
    units=['min'],
    title='Time spent in HR zones (7 day rolling sum)',
    x_range=p1.x_range,
    ylabel='Minutes',
    plot_width=450,
    trace=True,
    show_plot=False
);


p3, p3_cds = plot_cal_ts(df_ts)

df_sleep = pd.read_csv('../data/sleep.csv', parse_dates=['start', 'end', 'date'])
stages = ["deep", "rem", "light", "awake"]
colors = ['#154ba6', '#3f8dff', '#7ec4ff', '#e73360']
data = ColumnDataSource(df_sleep)


p4 = figure(
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window), 
    x_axis_type="datetime",
    plot_height=350, 
    plot_width=450,
    title="Sleep stages",
    toolbar_location=None, 
    tools=""
)
p4.add_layout(Legend(), 'below')
p4.vbar_stack(stages, x='date', width=24*60*60*900, color=colors, source=data, legend=[[s] for s in stages])
p4.y_range.start = 0
p4.x_range.range_padding = 0.1
p4.xgrid.grid_line_color = None
p4.axis.minor_tick_line_color = None
p4.outline_line_color = None
p4.legend.click_policy = 'hide'
p4.legend.orientation = "horizontal"
p4.yaxis.axis_label = 'Minutes'

p5, p5_cds = plotts(
    df_sleep,
    plot_height=350, 
    plot_width=450,
    ts_col='date',
    ys=['end_hour', 'start_hour'],
    units=['hour'],
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    ymin=22,
    styles=['|'], 
    ylabel='Hour',
    title='Sleep Schedule',
    show_plot=False
);

dp_callback = CustomJS(
    args={
        'source':p3_cds,
        'div':div,
        'wods':wods,
        'html':html
    },
    
    code=
        """
        console.log('div: ', cb_obj.value)
        console.log('test: ', html.replace("{A}", wods[cb_obj.value][0]))

        div.text = html.replace("{A}", wods[cb_obj.value][0]).replace("{B}", wods[cb_obj.value][1])
        source.data['BPM'] = source.data[cb_obj.value];
        source.change.emit()
        
        """
)

datePicker = DatePicker(width=150, value=df_ts.columns[-3])
datePicker.js_on_change('value', dp_callback)

tap_code = """
        console.log('DatePicker: ', dp.value)
        
        var dt_idx = p.selected.indices[0]
        var dt = p.data['ts_str'][dt_idx]
        
        console.log('Data selected: ', dt)
        dp.value = dt
        dp.change.emit()
        p.change.emit()
        r.change.emit()
        
        """ 

tap1_callback = CustomJS(args={'p': p1_cds, 'r': p2, 'dp': datePicker}, code=tap_code)
tap2_callback = CustomJS(args={'p': p2_cds, 'r': p1, 'dp': datePicker}, code=tap_code)

p1.add_tools(TapTool(callback=tap1_callback))
p2.add_tools(TapTool(callback=tap2_callback))

url = "https://www.wodup.com/timeline?date=@dt_str"

button = Button(width=100, label="WodUp", button_type="success")
button.js_on_click(CustomJS(
    args={
        'dp':datePicker,
        'urls':urls
    },
    code="""    
    var url = "https://www.wodup.com"
    
    function formatDate(date) {
    var d = new Date(date),
        month = '' + (d.getMonth() + 1),
        day = '' + d.getDate(),
        year = d.getFullYear();

    if (month.length < 2) 
        month = '0' + month;
    if (day.length < 2) 
        day = '0' + day;

    return [year, month, day].join('-');
    }
    
    var dt = dp.value
    console.log('Date:', formatDate(dt))
    
    if (typeof dt === 'string') {
      
      window.open(url.concat(urls[formatDate(Date.parse(dt))][0]))
    }
    else {
        var day = 60 * 60 * 24 * 1000;
        window.open(url.concat(urls[formatDate(dt+day)][0]))
    }

    """
)
)


In [None]:
dash = Column(div_header, Row(p4, p5), hr_rec, Row(p1, p2), Row(datePicker, button), Row(p3, div))
show(dash)