# Running Progress

**Katriona Goldmann**

This script looks at the exercise over time. Specifically, I am training for a half marathon so I want to compare logged runs to my plan to determine if I am on track. 

In [146]:
import pandas as pd
import numpy as np
import plotly
import plotly.plotly as py
import plotly.graph_objs as go
import plotly.io as pio
import datetime
import os

## Read in the data

In [147]:
ex = pd.read_csv("./Outputs/exercise.csv")

So lets look at the exercise completed:

In [148]:
ex.head()

Unnamed: 0.1,Unnamed: 0,activityId,activityParentId,activityParentName,calories,description,distance,duration,hasStartTime,isFavorite,lastModified,logId,name,startDate,startTime,steps,distance (miles)
0,0,52000,52000,Yoga,104,,,1800000,True,False,2018-12-27T23:10:34.000Z,18875096545,Yoga,27/12/2018,22:40,239,0.0
1,0,90009,90009,Run,346,Running - 5 mph (12 min/mile),4.352986,2893037,True,False,2018-12-31T13:33:42.000Z,18936842588,Run,31/12/2018,12:17,7313,2.704827
2,0,90009,90009,Run,111,Running - 5 mph (12 min/mile),1.304879,858000,True,False,2019-01-16T13:28:31.000Z,19144979937,Run,15/01/2019,17:50,2105,0.810816
3,0,90019,90019,Treadmill,181,2mph,1.615564,900000,True,False,2019-01-18T08:14:33.000Z,19297289610,Treadmill,18/01/2019,07:43,2745,1.003867
4,0,90009,90009,Run,259,Running - 5 mph (12 min/mile),3.106856,2000000,True,False,2019-01-24T17:09:35.000Z,19419790003,Run,21/01/2019,18:12,5169,1.930516


## Running Plan

So lets look specifically at running. 

In [149]:
runs = ex.loc[(ex.activityParentName == 'Run') | (ex.activityParentName == 'Treadmill') , ['distance', 'duration', 'calories', 'steps', 'startDate', 'startTime'] ]
runs['total_run'] = runs['distance'].cumsum()
runs.head()

Unnamed: 0,distance,duration,calories,steps,startDate,startTime,total_run
1,4.352986,2893037,346,7313,31/12/2018,12:17,4.352986
2,1.304879,858000,111,2105,15/01/2019,17:50,5.657865
3,1.615564,900000,181,2745,18/01/2019,07:43,7.273429
4,3.106856,2000000,259,5169,21/01/2019,18:12,10.380285
5,1.242742,923000,107,2020,23/01/2019,17:36,11.623027


Now lets load in the running plan. I created this using the [Nike Run Club app](https://www.nike.com/gb/en_gb/c/nike-plus/running-app-gps) then exported to a csv, which I would recommend, its super easy to create a plan using this. Although rather annoyingly, fitbit doesn't sync with this app currently. 

But however you decide to plan all you need is a csv with a distance, date and time column. 

In [150]:
plan = pd.read_csv("./Inputs/Running_Plan.csv")
plan['total_dist'] = plan['distance (km)'].cumsum()
plan.head()

Unnamed: 0,date,dow,day,Tminus,goal,type,notes,distance (km),week,old,total_dist
0,15/01/2019,3,Tuesday,18,,first run,,2.0,1,2.0,2.0
1,16/01/2019,4,Wednesday,18,,rest,,0.0,1,0.0,2.0
2,17/01/2019,5,Thursday,18,,rest,,0.0,1,0.0,2.0
3,18/01/2019,6,Friday,18,,benchmark,15 mins,2.0,1,2.0,4.0
4,19/01/2019,7,Saturday,18,,rest,,0.0,1,0.0,4.0


In [151]:
plan['text'] = 'run so far = ' + round(plan['total_dist']).map(str) + 'km, To run today=' + plan['distance (km)'].map(str) + 'km'

Reformat some of the columns as dates and convert to the correct units. 

In [152]:
# Format the dates as time stamps
plan['date_stamp'] = [datetime.datetime.strptime(x, '%d/%m/%Y') for x in plan['date']] 
runs['date_stamp']  = [datetime.datetime.strptime(x, '%d/%m/%Y') for x in runs['startDate']] 
plan['datec'] = [x.strftime('%d/%m/%Y') for x in plan['date_stamp']]
runs['datec'] = [x.strftime('%d/%m/%Y') for x in runs['date_stamp']]

# Convert to metric units
runs['total_run'] = runs['total_run']/0.62137119
runs['secs'] = runs['duration']/(1000)

plan.head()

Unnamed: 0,date,dow,day,Tminus,goal,type,notes,distance (km),week,old,total_dist,text,date_stamp,datec
0,15/01/2019,3,Tuesday,18,,first run,,2.0,1,2.0,2.0,"run so far = 2.0km, To run today=2.0km",2019-01-15,15/01/2019
1,16/01/2019,4,Wednesday,18,,rest,,0.0,1,0.0,2.0,"run so far = 2.0km, To run today=0.0km",2019-01-16,16/01/2019
2,17/01/2019,5,Thursday,18,,rest,,0.0,1,0.0,2.0,"run so far = 2.0km, To run today=0.0km",2019-01-17,17/01/2019
3,18/01/2019,6,Friday,18,,benchmark,15 mins,2.0,1,2.0,4.0,"run so far = 4.0km, To run today=2.0km",2019-01-18,18/01/2019
4,19/01/2019,7,Saturday,18,,rest,,0.0,1,0.0,4.0,"run so far = 4.0km, To run today=0.0km",2019-01-19,19/01/2019


# Plot Runs

Create a combined df to determine how things are looking comparing the plan to logged runs

In [153]:
combine = pd.merge(plan, runs, how='inner', left_on='datec', right_on='datec')
combine['cumsum'] = combine['total_run'].cumsum()
combine['on_track'] = 0
combine.loc[combine['total_run'] >= combine['total_dist'] , 'on_track'] = 1
combine['date_stamp'] = [datetime.datetime.strptime(str(x), "%d/%m/%Y") for x in combine['date']]

Now let's plot!

In [154]:
temp = combine.iloc[[-1]]
temp['date_stamp'].values[-1] =  pd.Timestamp(datetime.datetime.today().date())
combine = combine.append(temp, ignore_index=False, verify_integrity=False, sort=None)

combine.head()

Unnamed: 0,date,dow,day,Tminus,goal,type,notes,distance (km),week,old,...,calories,steps,startDate,startTime,total_run,date_stamp_y,secs,cumsum,on_track,date_stamp
0,15/01/2019,3,Tuesday,18,,first run,,2.0,1,2.0,...,111,2105,15/01/2019,17:50,9.105451,2019-01-15,858.0,9.105451,1,2019-01-15
1,18/01/2019,6,Friday,18,,benchmark,15 mins,2.0,1,2.0,...,181,2745,18/01/2019,07:43,11.705449,2019-01-18,900.0,20.8109,1,2019-01-18
2,21/01/2019,2,Monday,17,,long run,,5.0,2,5.0,...,259,5169,21/01/2019,18:12,16.705449,2019-01-21,2000.0,37.51635,1,2019-01-21
3,23/01/2019,4,Wednesday,17,,benchmark,15 mins,2.0,2,2.0,...,107,2020,23/01/2019,17:36,18.705449,2019-01-23,923.0,56.221799,1,2019-01-23
4,23/01/2019,4,Wednesday,17,,benchmark,15 mins,2.0,2,2.0,...,73,1312,23/01/2019,18:39,20.005448,2019-01-23,633.0,76.227247,1,2019-01-23


Plots red if distance is below running plan, green if above. 

In [155]:
cols = list(combine['on_track'])
cols = ['green' if x==1 else x for x in cols]
cols = ['red' if x==0 else x for x in cols]

In [156]:
# Plot the plan
plan_trace = go.Scatter(
    x=plan['date_stamp'],
    y=plan['total_dist'],
    text=plan['text'],
    hoverinfo='text',
    mode='lines',
    name='Running Plan',
    line=dict(shape= 'hv', color='grey'))

# Plot log 
log = go.Scatter(
    x=combine.loc[combine['on_track'].isin([0, 1]), 'date_stamp'],
    y=combine.loc[combine['on_track'].isin([0, 1]), 'total_run'],
    mode='lines+markers',
    marker= dict(color= cols),
    name='Runs logged on target',
    line=dict(shape= 'hv', color= 'blue'))

layout = go.Layout(
    title='Distance Covered by Running Plan',
    yaxis=dict(title='Distance Covered (km)'),
    shapes=[{
        'type': 'line',
        'x0': pd.Timestamp(datetime.datetime.today().date()),
        'y0': 0,
        'x1': pd.Timestamp(datetime.datetime.today().date()),
        'y1': max(plan['total_dist']),
        'line': {
            'color': 'grey',
            'dash': 'dot'
        }
    }],
    annotations=[
        dict(
            x=datetime.date(2019, 5, 19),
            y=400,
            text='* Race Day *',
            font=dict(color="gold"),
            showarrow=False)
    ])

fig = go.Figure([log, plan_trace], layout)
py.iplot(fig, filename='Running-Goal')


Consider using IPython.display.IFrame instead



Save to the figures folder. 

In [157]:
pio.write_image(fig, './figs/running_plan.png')

# Improvement in Pace

Covering distance is one thing, but I am also interested in seeing if my running actually improves over time. To do this I will look into my pace. Another option would be to analyse HR, but I have not synced that yet so its on the to do. 

In [158]:
runs['speed (m/s)'] = (runs['distance'] *1000)/runs['secs'] 
runs.head()

Unnamed: 0,distance,duration,calories,steps,startDate,startTime,total_run,date_stamp,datec,secs,speed (m/s)
1,4.352986,2893037,346,7313,31/12/2018,12:17,7.005452,2018-12-31,31/12/2018,2893.037,1.504642
2,1.304879,858000,111,2105,15/01/2019,17:50,9.105451,2019-01-15,15/01/2019,858.0,1.520838
3,1.615564,900000,181,2745,18/01/2019,07:43,11.705449,2019-01-18,18/01/2019,900.0,1.795071
4,3.106856,2000000,259,5169,21/01/2019,18:12,16.705449,2019-01-21,21/01/2019,2000.0,1.553428
5,1.242742,923000,107,2020,23/01/2019,17:36,18.705449,2019-01-23,23/01/2019,923.0,1.346416


In [161]:
tr = go.Scatter(x = runs['date_stamp'], y = runs['speed (m/s)'], 
                             text=runs['speed (m/s)'], hoverinfo='text',
                             mode = 'markers', name='Pace over time', line = dict(color = 'magenta'))

layout = go.Layout(title='Pace over time', yaxis=dict(title='speed (m/s)'))

fig = go.Figure([tr], layout)

py.iplot(fig, filename='Running-Pace')


Consider using IPython.display.IFrame instead



In [162]:
pio.write_image(fig, './figs/running_pace.png')

But this obviously doesn't take into account distance covered. I would hope I can run the 100m at a faster pace than 10km. So lets look at pace for distnace covered. 

In [163]:
from numpy import arange,array,ones
from scipy import stats

slope, intercept, r_value, p_value, std_err = stats.linregress(runs['distance'], runs['speed (m/s)'])
line = slope*runs['distance']+intercept 

In [164]:
tr = go.Scatter(x = runs['distance'], y = runs['speed (m/s)'], 
                             text=runs['speed (m/s)'], hoverinfo='text',
                             mode = 'markers', name='Pace over time', line = dict(color = 'blue'))
trace2 = go.Scatter(
                  x=runs['distance'].append(pd.Series([21], index=[len(runs['distance'])+1])),
                  y=line.append(pd.Series([slope*21+intercept], index=[len(line)+1])),
                  mode='lines',
                  marker=go.Marker(color='rgb(31, 119, 180)'),
                  name='Fit'
                  )

layout = go.Layout(title='Pace over time', yaxis=dict(title='Speed (m/s)'), xaxis=dict(title='Distance (km)'))

fig = go.Figure([tr, trace2], layout)
py.iplot(fig, filename='running-bar')


plotly.graph_objs.Marker is deprecated.
Please replace it with one of the following more specific types
  - plotly.graph_objs.scatter.Marker
  - plotly.graph_objs.histogram.selected.Marker
  - etc.




In [167]:
speed_ms = slope*21+intercept
speed_kmpermin = pace*60/1000
pace_minperkm = 1/speed_kmpermin
time = 21*(pace_kmpermin)
print(speed_kmpermin)
print(pace_kmpermin)
print(time)

0.3625180671330803
2.758483205839504
57.928147322629584


In [None]:
pio.write_image(fig, './figs/running_bar.png')