In [1]:
## to get access code...
## past into browser: https://www.strava.com/oauth/authorize?client_id=62286&response_type=code&redirect_uri=http://localhost/exchange_token&approval_prompt=force&scope=profile:read_all,activity:read_all
## and click authorise, will not find page, but note code in url redirect

In [2]:
import requests, json, os, pandas as pd, datetime as dt, time

In [3]:
access_code = 'b6293da66f50f2324c3ae74c6ec36a7203b69831'
client_id = 62286
secret = os.environ['STRAVA_API_SECRET']

In [4]:
def get_tokens(client_id: str, access_code: str, secret: str):
    response = requests.post(
                    url = 'https://www.strava.com/oauth/token',
                    data = {
                            'client_id': client_id,
                            'client_secret': f'{secret}',
                            'code': f'{access_code}',
                            'grant_type': 'authorization_code'
                            }
                )
    strava_tokens = response.json()
    return strava_tokens

In [5]:

# get token from stored file if poss to skip having to generate new access code as desc above
if os.path.exists('strava_tokens.json'):
    with open('strava_tokens.json', 'r') as f:
        token_data = json.load(f)
        
    # refresh if expired
    if token_data['expires_at'] < time.time():
        print('Token expired, calling Strava auth API with refresh token')
        resp = requests.post(url = 'https://www.strava.com/oauth/token',
                             data = {
                                     'client_id': client_id,
                                     'client_secret': secret,
                                     'grant_type': 'refresh_token',
                                     'refresh_token': token_data['refresh_token']
                                    }
                        )
        token_data = resp.json()    
        with open('strava_tokens.json', 'w') as f:
            json.dump(token_data, f) # update the file
else:
    token_data = get_tokens(client_id, access_code, secret)
    with open('strava_tokens.json', 'w') as f:
        json.dump(token_data, f)
        
# confirm we got data...
access_token=token_data['access_token']
print(token_data)

Token expired, calling Strava auth API with refresh token
{'token_type': 'Bearer', 'access_token': 'b3eb9f68a30dd16a7d71468026aa38934f10fb39', 'expires_at': 1614958252, 'expires_in': 21600, 'refresh_token': '071225a0d5c3f016839984092f80b11d22f5fbcb'}


In [6]:
def get_activities(access_token:str, n:int=50):
    url = f'https://www.strava.com/api/v3/athlete/activities?per_page={n}&access_token={access_token}'
    resp = requests.get(url)
    return resp.json()

In [7]:
data = get_activities(access_token)  # from strava / settings / My API App

In [8]:
orig_df = pd.json_normalize(data)
orig_df

Unnamed: 0,resource_state,name,distance,moving_time,elapsed_time,total_elevation_gain,type,workout_type,id,external_id,...,has_kudoed,suffer_score,athlete.id,athlete.resource_state,map.id,map.summary_polyline,map.resource_state,average_watts,kilojoules,device_watts
0,2,Morning Run,6534.9,2349,2352,84.8,Run,0.0,4892854322,garmin_push_6376331785,...,False,51.0,33557595,1,a4892854322,uze_IrtaLSFu@Xy@`@a@\Jl@RxB?|@Mv@?|BB|@Fz@Wl@Q...,2,,,
1,2,Speed endurance session,14012.1,5171,5193,212.8,Run,3.0,4884966433,garmin_push_6368512731,...,False,143.0,33557595,1,a4884966433,c{e_IntaLQ[QAa@JoAv@U?cAyFSk@Yi@c@Cw@l@[e@Qy@a...,2,,,
2,2,Morning Run,8680.8,3305,3310,83.1,Run,0.0,4875395191,garmin_push_6359146093,...,False,57.0,33557595,1,a4875395191,}ze_IxsaLKBwAr@]La@VADVxEH|@B~@A|BH|@ELWf@SjDJ...,2,,,
3,2,Long Slow Run,19788.9,8019,8024,262.2,Run,2.0,4864689964,garmin_push_6348658579,...,False,133.0,33557595,1,a4864689964,s{e_IpsaLo@P_B|@Kx@\rBD~@E`E@t@Lp@c@bDEz@B~BHt...,2,,,
4,2,Afternoon Run,8576.2,3251,3262,87.1,Run,0.0,4854294293,garmin_push_6338479836,...,False,72.0,33557595,1,a4854294293,g{e_InuaLGe@?CE?u@V]VaAd@LbACx@Bp@@~@?|DJv@D`A...,2,,,
5,2,Afternoon walk,5007.3,3376,3609,92.4,Walk,,4848466785,garmin_push_6332771649,...,False,7.0,33557595,1,a4848466785,}ze_IvsaLrAaAdBYPBVJf@dAn@d@dCmCd@OFBbCyAXMNMH...,2,,,
6,2,5 x 1k intervals,10013.9,3348,3351,61.5,Run,3.0,4842584642,garmin_push_6326996374,...,False,97.0,33557595,1,a4842584642,q|e_I~taLA]HgA?QKgACo@QmAOoBO_@oBl@i@JK{@Q_ESs...,2,,,
7,2,Afternoon Run,6489.8,2501,2516,84.3,Run,0.0,4836675408,garmin_push_6321175102,...,False,60.0,33557595,1,a4836675408,q{e_I~saLML_@Ls@^o@RIJ^xCBz@Kx@BzBDr@J|@Sp@QhB...,2,,,
8,2,Afternoon walk,2801.4,1470,1487,35.0,Walk,,4831260661,garmin_push_6315720771,...,False,5.0,33557595,1,a4831260661,u{e_IzsaLUuEMsASeBYYiA^m@ZWFOAMg@W}BKmBYg@c@iA...,2,,,
9,2,Ride with Andy,35209.3,6192,6815,463.3,Ride,10.0,4825157065,garmin_push_6309545840,...,False,58.0,33557595,1,a4825157065,mnd_Ip~eLEjEQjDbKxEHX_BlSl@zCrA`@z@Qh@|@~B`Ac@...,2,107.9,667.9,False


In [24]:
runs = orig_df[orig_df['type'] == 'Run']  # filter by activity
df = pd.DataFrame()
df['Date'] = runs['start_date'].apply(lambda v: v[:10])
df['Distance (km)'] = runs['distance'].apply(lambda v: v / 1000)
df['Elev (m)'] = runs['total_elevation_gain']
df['Pace (Excel)'] = runs.apply(lambda row: (row['elapsed_time'] / 60 / 60 /24) / (row['distance'] / 1000), axis=1)  # val actually in days, but works when formatted as a duration in Excel
df['HR (bpm)'] = runs['average_heartrate']

df['gap_exp'] = runs.apply(lambda row: 1 + (row['total_elevation_gain'] / row['distance']), axis=1)  # calc the exponent to use in grade adjusted pace calc
df['Speed (km/h)'] = runs.apply(lambda row: (row['distance'] / 1000) / (row['elapsed_time'] / 60 / 60), axis=1)
df['My GAS (km/h)'] = df.apply(lambda row: row['Speed (km/h)'] ** row['gap_exp'], axis=1)
df.drop(columns=['gap_exp'], inplace=True)  # only used as a calc step

def format_duration(seconds):
    minutes, seconds = divmod(int(seconds), 60)
    hours, minutes = divmod(minutes, 60)
    #print(f'hrs: {hours}, mins: {minutes}, secs: {seconds}')
    return '{:2d}:{:02d}:{:02d}'.format(hours, minutes, seconds)
    
def format_pace(row):
    secs_per_km = row['elapsed_time']/(row['distance'] / 1000)
    return format_duration(secs_per_km)  # will format into h:mm:ss
    
def format_my_GAP(row):
    gap_hrs_per_km = 1 / row['My GAS (km/h)'] 
    gap_secs_per_km = int(gap_hrs_per_km * 60 * 60)
    return format_duration(gap_secs_per_km)  # will format into h:mm:ss

df['Time'] = runs['elapsed_time'].apply(format_duration)
df['Pace (min/km)'] = runs.apply(format_pace, axis=1)
df['My GAP (min/km)'] = df.apply(format_my_GAP, axis=1)
#df

Unnamed: 0,Date,Distance (km),Elev (m),Pace (Excel),HR (bpm),Speed (km/h),My GAS (km/h),Time,Pace (min/km),My GAP (min/km)
0,2021-03-05,6.5349,84.8,0.004166,165.3,10.002398,10.305806,0:39:12,0:05:59,0:05:49
1,2021-03-03,14.0121,212.8,0.004289,169.2,9.713761,10.055015,1:26:33,0:06:10,0:05:58
2,2021-03-02,8.6808,83.1,0.004413,159.3,9.441353,9.646463,0:55:10,0:06:21,0:06:13
3,2021-02-28,19.7889,262.2,0.004693,158.8,8.87837,9.138996,2:13:44,0:06:45,0:06:33
4,2021-02-26,8.5762,87.1,0.004402,163.0,9.464844,9.683378,0:54:22,0:06:20,0:06:11
6,2021-02-24,10.0139,61.5,0.003873,171.8,10.757995,10.916104,0:55:51,0:05:34,0:05:29
7,2021-02-23,6.4898,84.3,0.004487,165.0,9.285882,9.558612,0:41:56,0:06:27,0:06:16
10,2021-02-20,15.2405,127.8,0.004579,158.5,9.100315,9.270404,1:40:29,0:06:35,0:06:28
11,2021-02-18,10.4841,120.6,0.004132,173.6,10.083559,10.355201,1:02:23,0:05:57,0:05:47
12,2021-02-17,9.0113,87.2,0.00442,161.8,9.42769,9.634614,0:57:21,0:06:21,0:06:13


In [26]:
# try adding shoe to this (we need to hit another API)
cache = {}

def get_gear(access_token, gear_id:int):
    url = f'https://www.strava.com/api/v3/gear/{gear_id}?access_token={access_token}'
    if not gear_id in cache:
        resp = requests.get(url)
        cache[gear_id] = resp.json()
    return cache[gear_id]

def row_to_gear_name(gear_id):
    gear = get_gear(access_token, gear_id)
    try:
        return gear['name'] if 'name' in gear else gear['brand_name'] + ' ' + gear['model_name']
    except KeyError:
        return f'Failed to get name from resp: {gear}'

try:
    df['Shoe'] = runs['gear_id'].apply(row_to_gear_name)
except Exception as e:
    print(f"Tried and failed to lookup shoes from gear ids, got error: {repr(e)}")

df

Unnamed: 0,Date,Distance (km),Elev (m),Pace (Excel),HR (bpm),Speed (km/h),My GAS (km/h),Time,Pace (min/km),My GAP (min/km),Shoe
0,2021-03-05,6.5349,84.8,0.004166,165.3,10.002398,10.305806,0:39:12,0:05:59,0:05:49,Skechers GoRun Pulse
1,2021-03-03,14.0121,212.8,0.004289,169.2,9.713761,10.055015,1:26:33,0:06:10,0:05:58,HOKA ONE ONE Rincon2
2,2021-03-02,8.6808,83.1,0.004413,159.3,9.441353,9.646463,0:55:10,0:06:21,0:06:13,Saucony Iso 2
3,2021-02-28,19.7889,262.2,0.004693,158.8,8.87837,9.138996,2:13:44,0:06:45,0:06:33,HOKA ONE ONE Rincon2
4,2021-02-26,8.5762,87.1,0.004402,163.0,9.464844,9.683378,0:54:22,0:06:20,0:06:11,ASICS Gel-Cumulus 22 (Winterized)
6,2021-02-24,10.0139,61.5,0.003873,171.8,10.757995,10.916104,0:55:51,0:05:34,0:05:29,HOKA ONE ONE Rincon2
7,2021-02-23,6.4898,84.3,0.004487,165.0,9.285882,9.558612,0:41:56,0:06:27,0:06:16,Saucony Iso 2
10,2021-02-20,15.2405,127.8,0.004579,158.5,9.100315,9.270404,1:40:29,0:06:35,0:06:28,Saucony Iso 2
11,2021-02-18,10.4841,120.6,0.004132,173.6,10.083559,10.355201,1:02:23,0:05:57,0:05:47,HOKA ONE ONE Rincon2
12,2021-02-17,9.0113,87.2,0.00442,161.8,9.42769,9.634614,0:57:21,0:06:21,0:06:13,Saucony Iso 2


In [27]:
downloads_dir = os.path.join(os.getenv('userprofile'), 'Downloads')
runs.to_excel(os.path.join(downloads_dir, f"raw_recent_runs_{dt.date.today().strftime('%Y_%m_%d')}.xlsx"))
df.to_excel(os.path.join(downloads_dir, f"recent_runs_{dt.date.today().strftime('%Y_%m_%d')}.xlsx"))

In [28]:
#https://www.strava.com/api/v3/activities/4875395191/streams?keys=time,distance,altitude,velocity_smooth,heartrate,cadence,grade_smooth&key_by_type=false

keys = 'time,distance,altitude,velocity_smooth,heartrate,cadence,grade_smooth'

# get the raw measurement data for an activity (as an Excel sheet)
def get_activity_stream(id):    
    url = f'https://www.strava.com/api/v3/activities/{id}/streams?keys={keys}&key_by_type=true&access_token={access_token}'
    resp = requests.get(url)
    return resp.json()

latest_id = runs['id'][0]
stream_data = get_activity_stream(latest_id)

headers = keys.split(',')
cols = []
for key in headers:
    cols.append(stream_data[key]['data'])
    
rows = map(list, zip(*cols))  # transpose the columns to get rows of data

stream_df = pd.DataFrame(rows, columns=headers)
stream_df
    

Unnamed: 0,time,distance,altitude,velocity_smooth,heartrate,cadence,grade_smooth
0,0,0.7,95.3,0.0,120,0,-0.9
1,4,6.3,95.3,1.4,124,85,-0.6
2,6,11.8,95.2,1.9,127,85,-0.4
3,8,17.5,95.2,2.1,131,85,-0.7
4,10,23.9,95.2,2.9,135,85,-0.3
...,...,...,...,...,...,...,...
328,2323,6456.4,94.3,2.6,171,86,2.7
329,2331,6477.5,94.7,2.6,171,86,1.8
330,2339,6498.4,95.0,2.6,172,85,1.3
331,2347,6521.1,95.2,2.7,170,87,1.0


In [29]:
stream_df.to_excel(os.path.join(downloads_dir, f'activity_stream_{latest_id}.xlsx'))