# Exercise Progress

#### Katriona Goldmann

This script looks at the exercise over time. Specifically, I am training for a half marathon so I want to compare logged runs to my plan to determine if I am on track. 

In [34]:
import pandas as pd
import numpy as np
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.io as pio
import datetime
import os

----

## Read in the data

In [35]:
ex = pd.read_csv("./Outputs/exercise.csv")

So lets look at the exercise completed:

In [36]:
ex.head()

Unnamed: 0.1,Unnamed: 0,activityId,activityParentId,activityParentName,calories,description,distance,duration,hasStartTime,isFavorite,lastModified,logId,name,startDate,startTime,steps
0,0,52000,52000,Yoga,104,,,1800000,True,False,2018-12-27T23:10:34.000Z,18875096545,Yoga,27/12/2018,22:40,239
1,0,90009,90009,Run,346,Running - 5 mph (12 min/mile),4.352986,2893037,True,False,2018-12-31T13:33:42.000Z,18936842588,Run,31/12/2018,12:17,7313
2,0,90009,90009,Run,111,Running - 5 mph (12 min/mile),1.304879,858000,True,False,2019-01-16T13:28:31.000Z,19144979937,Run,15/01/2019,17:50,2105
3,0,90019,90019,Treadmill,181,2mph,1.615564,900000,True,False,2019-01-18T08:14:33.000Z,19297289610,Treadmill,18/01/2019,07:43,2745
4,0,90009,90009,Run,259,Running - 5 mph (12 min/mile),3.106856,2000000,True,False,2019-01-24T17:09:35.000Z,19419790003,Run,21/01/2019,18:12,5169


You can see in January there is mainly running and yoga. This is because I unfortunately broke two fingers and was in a cast so unfortunately my exercise was pretty limited. 

-----

## Running Plan

So lets look specifically at running. 

In [37]:
runs = ex.loc[(ex.activityParentName == 'Run') | (ex.activityParentName == 'Treadmill') , ['distance', 'duration', 'calories', 'steps', 'startDate', 'startTime'] ]
runs['total_run'] = runs['distance'].cumsum()
runs.head()

Unnamed: 0,distance,duration,calories,steps,startDate,startTime,total_run
1,4.352986,2893037,346,7313,31/12/2018,12:17,4.352986
2,1.304879,858000,111,2105,15/01/2019,17:50,5.657865
3,1.615564,900000,181,2745,18/01/2019,07:43,7.273429
4,3.106856,2000000,259,5169,21/01/2019,18:12,10.380285
5,1.242742,923000,107,2020,23/01/2019,17:36,11.623027


Now lets load in the running plan. I created this using the [Nike Run Club app](https://www.nike.com/gb/en_gb/c/nike-plus/running-app-gps) then exported to a csv, which I would recommend, its super easy to create a plan using this. Although rather annoyingly, fitbit doesn't sync with this app currently. 

But however you decide to plan all you need is a csv with a distance, date and time column. 

In [38]:
plan = pd.read_csv("./Inputs/Running_Plan.csv")
plan['total_dist'] = plan['distance (km)'].cumsum()
plan.head()

Unnamed: 0,date,dow,day,Tminus,goal,type,notes,distance (km),week,old,total_dist
0,15/01/2019,3,Tuesday,18,,first run,,2.0,1,2.0,2.0
1,16/01/2019,4,Wednesday,18,,rest,,0.0,1,0.0,2.0
2,17/01/2019,5,Thursday,18,,rest,,0.0,1,0.0,2.0
3,18/01/2019,6,Friday,18,,benchmark,15 mins,2.0,1,2.0,4.0
4,19/01/2019,7,Saturday,18,,rest,,0.0,1,0.0,4.0


In [39]:
plan['text'] = 'run so far = ' + round(plan['total_dist']).map(str) + 'km, To run today=' + plan['distance (km)'].map(str) + 'km'

Reformat some of the columns as dates and convert to the correct units. 

In [40]:
# Format the dates as time stamps
plan['date_stamp'] = [datetime.datetime.strptime(x, '%d/%m/%Y') for x in plan['date']] 
runs['date_stamp']  = [datetime.datetime.strptime(x, '%d/%m/%Y') for x in runs['startDate']] 
plan['datec'] = [x.strftime('%d/%m/%Y') for x in plan['date_stamp']]
runs['datec'] = [x.strftime('%d/%m/%Y') for x in runs['date_stamp']]

# Convert to metric units
runs['total_run'] = runs['total_run']/0.62137119
runs['secs'] = runs['duration']/(1000)

plan

Unnamed: 0,date,dow,day,Tminus,goal,type,notes,distance (km),week,old,total_dist,text,date_stamp,datec
0,15/01/2019,3,Tuesday,18,,first run,,2.0,1,2.0,2.0,"run so far = 2.0km, To run today=2.0km",2019-01-15,15/01/2019
1,16/01/2019,4,Wednesday,18,,rest,,0.0,1,0.0,2.0,"run so far = 2.0km, To run today=0.0km",2019-01-16,16/01/2019
2,17/01/2019,5,Thursday,18,,rest,,0.0,1,0.0,2.0,"run so far = 2.0km, To run today=0.0km",2019-01-17,17/01/2019
3,18/01/2019,6,Friday,18,,benchmark,15 mins,2.0,1,2.0,4.0,"run so far = 4.0km, To run today=2.0km",2019-01-18,18/01/2019
4,19/01/2019,7,Saturday,18,,rest,,0.0,1,0.0,4.0,"run so far = 4.0km, To run today=0.0km",2019-01-19,19/01/2019
5,20/01/2019,1,Sunday,18,,recovery,,2.0,1,2.0,6.0,"run so far = 6.0km, To run today=2.0km",2019-01-20,20/01/2019
6,21/01/2019,2,Monday,17,,long run,,5.0,2,5.0,11.0,"run so far = 11.0km, To run today=5.0km",2019-01-21,21/01/2019
7,22/01/2019,3,Tuesday,17,,rest,,0.0,2,0.0,11.0,"run so far = 11.0km, To run today=0.0km",2019-01-22,22/01/2019
8,23/01/2019,4,Wednesday,17,,benchmark,15 mins,2.0,2,2.0,13.0,"run so far = 13.0km, To run today=2.0km",2019-01-23,23/01/2019
9,24/01/2019,5,Thursday,17,,rest,,0.0,2,0.0,13.0,"run so far = 13.0km, To run today=0.0km",2019-01-24,24/01/2019


-----

# Plot Runs

Create a combined df to determine how things are looking comparing the plan to logged runs

In [41]:
combine = pd.merge(plan, runs, how='inner', left_on='datec', right_on='datec')
combine['cumsum'] = combine['total_run'].cumsum()
combine['on_track'] = 0
combine.loc[combine['total_run'].cumsum() >= combine['total_dist'] , 'on_track'] = 1
combine['date_stamp'] = [datetime.datetime.strptime(str(x), "%d/%m/%Y") for x in combine['date']]

Now let's plot!

In [42]:
temp = combine.iloc[[-1]]
temp['date_stamp'].values[-1] =  pd.Timestamp(datetime.datetime.today().date())

combine = combine.append(temp, ignore_index=False, verify_integrity=False, sort=None)

combine

Unnamed: 0,date,dow,day,Tminus,goal,type,notes,distance (km),week,old,...,calories,steps,startDate,startTime,total_run,date_stamp_y,secs,cumsum,on_track,date_stamp
0,15/01/2019,3,Tuesday,18,,first run,,2.0,1,2.0,...,111,2105,15/01/2019,17:50,9.105451,2019-01-15,858.0,9.105451,1,2019-01-15
1,18/01/2019,6,Friday,18,,benchmark,15 mins,2.0,1,2.0,...,181,2745,18/01/2019,07:43,11.705449,2019-01-18,900.0,20.8109,1,2019-01-18
2,21/01/2019,2,Monday,17,,long run,,5.0,2,5.0,...,259,5169,21/01/2019,18:12,16.705449,2019-01-21,2000.0,37.51635,1,2019-01-21
3,23/01/2019,4,Wednesday,17,,benchmark,15 mins,2.0,2,2.0,...,107,2020,23/01/2019,17:36,18.705449,2019-01-23,923.0,56.221799,1,2019-01-23
4,23/01/2019,4,Wednesday,17,,benchmark,15 mins,2.0,2,2.0,...,73,1312,23/01/2019,18:39,20.005448,2019-01-23,633.0,76.227247,1,2019-01-23
5,02/02/2019,7,Saturday,16,,rest,,2.5,3,0.0,...,134,2552,02/02/2019,10:11,22.505448,2019-02-02,931.0,98.732695,1,2019-02-02
6,02/02/2019,7,Saturday,16,,rest,,2.5,3,0.0,...,107,2099,02/02/2019,10:40,24.605447,2019-02-02,740.0,123.338142,1,2019-02-02
7,03/02/2019,1,Sunday,16,,long run,,0.0,3,7.0,...,71,1350,03/02/2019,10:54,26.005446,2019-02-03,494.0,149.343588,1,2019-02-03
8,07/02/2019,5,Thursday,15,,rest,,0.0,4,0.0,...,193,3738,07/02/2019,18:32,29.546003,2019-02-07,1680.0,178.889591,1,2019-02-07
9,09/02/2019,7,Saturday,15,,rest,,0.0,4,0.0,...,94,2219,09/02/2019,10:41,31.814468,2019-02-09,775.0,210.704059,1,2019-02-09


In [43]:
# Plot the plan
plan_trace = go.Scatter(x = plan['date_stamp'], y = plan['total_dist'], 
                             text=plan['text'], hoverinfo='text',
                             mode = 'lines', name='Running Plan', line = dict(color = 'grey'))

# Plot log if better than plan
log_good = go.Scatter(x = combine.loc[combine['on_track'] == 1, 'date_stamp'], 
                      y = combine.loc[combine['on_track'] == 1, 'total_run'], 
                             mode = 'lines', name='Runs logged on target', line = dict(color = 'green'))

# Plot log if worse than plan
log_bad = go.Scatter(x = combine.loc[combine['on_track'] == 0, 'date_stamp'], 
                     y = combine.loc[combine['on_track'] == 0, 'total_run'], 
                             mode = 'lines', name='Runs logged under target', line = dict(color = 'red'))

layout = go.Layout(title='Distance Covered by Running Plan', yaxis=dict(title='Distance Covered (km)'), 
                 shapes= [{'type': 'line','x0': pd.Timestamp(datetime.datetime.today().date()),'y0': 0,
                           'x1': pd.Timestamp(datetime.datetime.today().date()),'y1': max(plan['total_dist']),
                           'line': {'color': 'grey', 'dash': 'dot'}}], 
                annotations=[dict(x=datetime.date(2019, 5, 19), y=400, text='* Race Day *', font=dict(color = "gold"), showarrow=False)])

fig = go.Figure([log_good, log_bad, plan_trace], layout)
py.iplot(fig, filename='Running-Goal')


Consider using IPython.display.IFrame instead



Save to the figures folder. 

In [44]:
pio.write_image(fig, './figs/running_plan.png')

-----

# Improvement in Pace

Covering distance is one thing, but I am also interested in seeing if my running actually improves over time. To do this I will look into my pace. Another option would be to analyse HR, but I have not synced that yet so its on the to do. 

In [45]:
runs['pace (m/s)'] = (runs['distance'] *1000)/runs['secs'] 
runs.head()

Unnamed: 0,distance,duration,calories,steps,startDate,startTime,total_run,date_stamp,datec,secs,pace (m/s)
1,4.352986,2893037,346,7313,31/12/2018,12:17,7.005452,2018-12-31,31/12/2018,2893.037,1.504642
2,1.304879,858000,111,2105,15/01/2019,17:50,9.105451,2019-01-15,15/01/2019,858.0,1.520838
3,1.615564,900000,181,2745,18/01/2019,07:43,11.705449,2019-01-18,18/01/2019,900.0,1.795071
4,3.106856,2000000,259,5169,21/01/2019,18:12,16.705449,2019-01-21,21/01/2019,2000.0,1.553428
5,1.242742,923000,107,2020,23/01/2019,17:36,18.705449,2019-01-23,23/01/2019,923.0,1.346416


In [46]:
tr = go.Scatter(x = runs['date_stamp'], y = runs['pace (m/s)'], 
                             text=runs['pace (m/s)'], hoverinfo='text',
                             mode = 'lines', name='Pace over time', line = dict(color = 'magenta'))

layout = go.Layout(title='Pace over time', yaxis=dict(title='Pace (m/s)'))

fig = go.Figure([tr], layout)

py.iplot(fig, filename='Running-Pace')

In [47]:
pio.write_image(fig, './figs/running_pace.png')

But this obviously doesn't take into account distance covered. I would hope I can run the 100m at a faster pace than 10km. So lets look at pace for distnace covered. 

In [48]:
data = [go.Bar(x=runs['distance'], y=runs['pace (m/s)'])]
layout = go.Layout(title='Pace for each distance covered', yaxis=dict(title='Pace (m/s)'), 
                   xaxis=dict(title='Distance (km)'))

fig = go.Figure(data, layout)

In [49]:
py.iplot(fig, filename='running-bar')

In [32]:
pio.write_image(fig, './figs/running_bar.png')