In [59]:
import config
import requests
import pandas as pd
from pandas import json_normalize
import json
import time
import csv


## Authenticate with Strava

In [60]:
with open('strava_tokens.json') as json_file:
    strava_tokens = json.load(json_file)

if strava_tokens['expires_at'] < time.time():
    response = requests.post(
                        url = 'https://www.strava.com/oauth/token',
                        data = {
                                'client_id': config.client_id,
                                'client_secret': config.client_secret,
                                'grant_type': 'refresh_token',
                                'refresh_token': strava_tokens['refresh_token']
                                }
                    )
    new_strava_tokens = response.json()

    with open('strava_tokens.json', 'w') as outfile:
        json.dump(new_strava_tokens, outfile)

    strava_tokens = new_strava_tokens

In [61]:
with open('strava_tokens.json') as check:
  data = json.load(check)

In [62]:
# Get the tokens from file to connect to Strava
with open('strava_tokens.json') as json_file:
    strava_tokens = json.load(json_file)
# Loop through all activities
url = "https://www.strava.com/api/v3/activities"
access_token = strava_tokens['access_token']
# Get first page of activities from Strava with all fields
r = requests.get(url + '?access_token=' + access_token)
r = r.json()
    
df = json_normalize(r)
df.to_csv('strava_activities_all_fields.csv')

## Data Retrieval

In [63]:
# Loop through all activities
page = 1
url = "https://www.strava.com/api/v3/activities"
access_token = strava_tokens['access_token']
# Create the dataframe ready for the API call to store your activity data
activities = pd.DataFrame(
    columns = [
            "id",
            "name",
            "start_date_local",
            "type",
            "distance",
            "moving_time",
            "elapsed_time",
            "total_elevation_gain",
            "end_latlng",
            "location_country",
            "flagged",
            "has_heartrate",
            "average_heartrate",
            "max_heartrate"
    ]
)
while True:
    
    # get page of activities from Strava
    r = requests.get(url + '?access_token=' + access_token + '&per_page=200' + '&page=' + str(page))
    r = r.json()
    
    # if no results then exit loop
    if (not r):
        break
    
    # otherwise add new data to dataframe
    for x in range(len(r)):
        activities.loc[x + (page-1)*200,'id'] = r[x]['id']
        activities.loc[x + (page-1)*200,'name'] = r[x]['name']
        activities.loc[x + (page-1)*200,'start_date_local'] = r[x]['start_date_local']
        activities.loc[x + (page-1)*200,'type'] = r[x]['type']
        activities.loc[x + (page-1)*200,'distance'] = r[x]['distance']
        activities.loc[x + (page-1)*200,'moving_time'] = r[x]['moving_time']
        activities.loc[x + (page-1)*200,'elapsed_time'] = r[x]['elapsed_time']
        activities.loc[x + (page-1)*200,'total_elevation_gain'] = r[x]['total_elevation_gain']
        activities.loc[x + (page-1)*200,'end_latlng'] = r[x]['end_latlng']
        activities.loc[x + (page-1)*200,'location_country'] = r[x]['location_country']
        activities.loc[x + (page-1)*200,'flagged'] = r[x]['flagged']
        activities.loc[x + (page-1)*200,'has_heartrate'] = r[x]['has_heartrate']
    # increment page
    page += 1
# Export your activities file as a csv 
# to the folder you're running this script in
activities.to_csv('strava_activities.csv')

## Data Exploration

In [64]:
activities.head()

Unnamed: 0,id,name,start_date_local,type,distance,moving_time,elapsed_time,total_elevation_gain,end_latlng,location_country,flagged,has_heartrate,average_heartrate,max_heartrate
0,6345939956,Mitja Marató de Figueres 2021,2021-12-05T10:30:05Z,Run,21219.2,5976,5992,60.4,"[42.26630539633334, 2.9646223224699497]",Spain,False,True,,
1,6333698342,Taper Run,2021-12-02T12:44:09Z,Run,6446.0,1853,1856,10.6,"[41.96143009699881, 3.0379427690058947]",Spain,False,True,,
2,6325439450,Taper run,2021-11-30T13:05:44Z,Run,6438.6,1850,1850,10.2,"[41.96177165955305, 3.037542700767517]",Spain,False,True,,
3,6316772468,Still recovering,2021-11-28T11:02:59Z,Run,9655.9,3003,3123,55.4,"[41.960421334952116, 3.0389138124883175]",Spain,False,True,,
4,6308769031,Breaking the Ice after Gastroenteritis,2021-11-26T10:52:14Z,Run,7563.2,2487,2493,60.0,"[41.96040934883058, 3.039237018674612]",Spain,False,True,,


### Get Only Half Marathon Training Activities

In [65]:
activities['start_date_local'] = pd.to_datetime(activities['start_date_local'])

start_date = '2021-10-23T12:44:09Z'
end_date = '2021-12-05T12:44:09Z'

mask = (activities['start_date_local'] > start_date) & (activities['start_date_local'] <= end_date)

df = activities.loc[mask]
df

Unnamed: 0,id,name,start_date_local,type,distance,moving_time,elapsed_time,total_elevation_gain,end_latlng,location_country,flagged,has_heartrate,average_heartrate,max_heartrate
0,6345939956,Mitja Marató de Figueres 2021,2021-12-05 10:30:05+00:00,Run,21219.2,5976,5992,60.4,"[42.26630539633334, 2.9646223224699497]",Spain,False,True,,
1,6333698342,Taper Run,2021-12-02 12:44:09+00:00,Run,6446.0,1853,1856,10.6,"[41.96143009699881, 3.0379427690058947]",Spain,False,True,,
2,6325439450,Taper run,2021-11-30 13:05:44+00:00,Run,6438.6,1850,1850,10.2,"[41.96177165955305, 3.037542700767517]",Spain,False,True,,
3,6316772468,Still recovering,2021-11-28 11:02:59+00:00,Run,9655.9,3003,3123,55.4,"[41.960421334952116, 3.0389138124883175]",Spain,False,True,,
4,6308769031,Breaking the Ice after Gastroenteritis,2021-11-26 10:52:14+00:00,Run,7563.2,2487,2493,60.0,"[41.96040934883058, 3.039237018674612]",Spain,False,True,,
5,6286437698,Brief,2021-11-21 08:51:45+00:00,Run,4830.3,1380,1380,11.9,"[41.96025302633643, 3.0402249097824097]",Spain,False,True,,
6,6282163810,Tis cold,2021-11-20 10:06:44+00:00,Run,10943.0,3448,3618,57.6,"[41.96085887029767, 3.039064686745405]",Spain,False,True,,
7,6277848620,3 Degrees baby!,2021-11-19 08:05:53+00:00,Run,11795.9,3501,3501,24.3,"[41.96045930497348, 3.0383911170065403]",Spain,False,True,,
8,6269137990,Wild goose chase!,2021-11-17 09:13:48+00:00,Run,6763.7,2127,2167,37.1,"[43.59446053393185, 1.444098399952054]",Spain,False,True,,
9,6264202404,"First time in France, would you believe",2021-11-16 07:22:53+00:00,Run,6440.0,1940,1983,12.3,"[43.59452926553786, 1.443996476009488]",Spain,False,True,,
