# Scraping WodUp

In [1]:
%load_ext autoreload
%autoreload 2

In [2]:
import sys
sys.path.append('../src/')
import wodupcrawler
import os
from bokeh.models import Div, ColumnDataSource, DataRange1d, Legend, HoverTool, Column
from bokeh.plotting import figure

from bokeh.io import output_notebook, show
import fitetl, wodupcrawler
import glob
import pandas as pd
from datetime import datetime
import sleepetl
import plotutils

output_notebook()

In [3]:
# Heart rate .fit files
#fitetl.main()
df_hr_rcvry = fitetl.read_rcvry_csv(fitetl.datadir_hrsum)
df_hr_profile = fitetl.read_hr_profile_csv(fitetl.datadir_hrts)
ts_files = sorted(glob.glob(f'{fitetl.datadir_hrts}*.csv'))

[########################################] | 100% Completed |  0.9s


In [4]:
df_hr_rcvry.head()

Unnamed: 0_level_0,60_sec_rec,120_sec_rec,180_sec_rec,174_220,152_173,138_151,119_137,0_118,calories,max_hr,L0,L1,L2,L3,date
timestamp,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1
2020-05-25 07:00:00,48.0,57.0,59.0,0.0,3.8,6.05,2.033333,400,189.0,157.0,22,53,59,66,2020-05-25
2020-05-26 07:00:00,48.0,49.0,56.0,0.75,8.0,1.95,1.35,1504,331.0,175.0,22,53,59,66,2020-05-26
2020-06-16 07:00:00,33.0,37.0,42.0,0.0,0.0,4.4,16.133333,1654,406.0,147.0,22,53,59,66,2020-06-16
2020-06-19 07:00:00,34.0,40.0,49.0,0.0,7.15,4.35,12.15,2505,539.0,173.0,22,53,59,66,2020-06-19
2020-06-20 07:00:00,61.0,49.0,60.0,12.75,13.2,3.75,12.433333,309,665.0,182.0,22,53,59,66,2020-06-20


In [5]:
df_hr_profile.head()

Unnamed: 0,s,2020-05-25,2020-05-26,2020-06-16,2020-06-19,2020-06-20,2020-06-23,2020-06-25,2020-06-30,2020-07-06,...,2020-11-04,2020-11-05,2020-11-06,2020-11-09,2020-11-10,2020-11-14,2020-11-16,2020-11-17,Time,BPM
0,0,87.0,102.0,45.0,91.0,82.0,74.0,,69.0,90.0,...,,99.0,,78.0,,,89.0,72.0,00:00:00,72.0
1,1,87.0,102.0,45.0,91.0,82.0,74.0,72.0,70.0,89.0,...,59.0,98.0,86.0,75.0,94.0,94.0,88.0,73.0,00:00:01,73.0
2,2,87.0,102.0,45.0,91.0,82.0,74.0,72.0,72.0,89.0,...,59.0,97.0,86.0,70.0,94.0,94.0,88.0,73.0,00:00:02,73.0
3,3,96.0,103.0,45.0,91.0,74.0,101.0,72.0,73.0,89.0,...,59.0,97.0,87.0,69.0,95.0,94.0,87.0,73.0,00:00:03,73.0
4,4,133.0,111.0,45.0,83.0,74.0,104.0,72.0,74.0,86.0,...,59.0,97.0,88.0,69.0,94.0,95.0,86.0,74.0,00:00:04,74.0


In [6]:
plot_window=pd.Timedelta('70 days')

In [7]:
plot_hr_rcvry, plot_hr_rcvry_cds = plotutils.plot_ts(
    df_hr_rcvry,
    ys=['120_sec_rec', 'L2', 'L1', 'L0', 'L3'],
    units=['bpm'],
    x_axis_type='datetime',
    x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
    styles=['|'] + ['-'] * 4,
    alphas=[1, 1, 1, 1],
    title='2 min heart rate recovery',
    palette=['grey']+['#3f8dff', '#7ec4ff', '#e73360', '#154ba6'], #154ba6
    bar_line_color='white',
    line_width=2,
    ylabel='Beats',
    plot_height=325,
    plot_width=450,
    tools='xwheel_pan,pan,reset',
    active_scroll='xwheel_pan',
    show_plot=False
);

show(plot_hr_rcvry)

In [8]:
def plot_stacked_hr_zones(cds, plot_window, plot_height=325, plot_width=450):
    stages = ['119_137', '138_151', '152_173', '174_220']
    colors = list(reversed(['#e73360', '#154ba6', '#3f8dff', '#7ec4ff']))

    p = figure(
        x_range=DataRange1d(end=datetime.today()+pd.Timedelta('1 days'), follow='end', follow_interval=plot_window),
        x_axis_type="datetime",
        plot_height=plot_height,
        plot_width=plot_width,
        tools='box_select,lasso_select,xwheel_pan,pan,reset,box_zoom',
        active_drag='box_select',
        toolbar_location='above',
        title="Heart rate zones",
    )
    p.add_layout(Legend(), 'below')
    p.vbar_stack(
        stages, 
        x='date', 
        width=24*60*60*900, 
        color=colors, 
        source=cds, 
        legend_label=[s.replace('_', '-') for s in stages]
    )

    p.y_range.start = 0
    p.x_range.range_padding = 0.1
    p.xgrid.grid_line_color = None
    p.axis.minor_tick_line_color = None
    p.add_tools(HoverTool(
            tooltips=[
                ("Date", "@ts_str")
            ]
        ))
    p.outline_line_color = None
    p.legend.click_policy = 'hide'
    p.legend.orientation = "horizontal"
    p.legend.border_line_alpha = 0
    p.yaxis.axis_label = 'Hours'

    return p, cds

In [12]:
p, cds = plot_stacked_hr_zones(plot_hr_rcvry_cds, plot_window)

In [13]:
show(Column(p, plot_hr_rcvry))

In [11]:
plot_hr_rcvry_cds