# Strava API

https://towardsdatascience.com/using-the-strava-api-and-pandas-to-explore-your-activity-data-d94901d9bfde

https://medium.com/@lejczak.learn/get-your-strava-activity-data-using-python-2023-%EF%B8%8F-b03b176965d0

In [1]:
import requests 
from dotenv import load_dotenv
import os 
import pandas as pd 
from datetime import datetime

## Env Variables

In [2]:
# Env Variables
load_dotenv()
client_id = os.environ["CLIENT_ID"] 
client_secret = os.environ["CLIENT_SECRET"] 

auth_endpoint = "https://www.strava.com/oauth/token"
activites_endpoint = "https://www.strava.com/api/v3/athlete/activities"

In [3]:
# Paste this in Web browser 
auth_path = f"https://www.strava.com/oauth/authorize?client_id={client_id}&response_type=code&redirect_uri=http://localhost/exchange_token&approval_prompt=force&scope=activity:read_all"
auth_path

'https://www.strava.com/oauth/authorize?client_id=131915&response_type=code&redirect_uri=http://localhost/exchange_token&approval_prompt=force&scope=activity:read_all'

In [4]:
code = "f349f37117072f2691598c00182dab9916723cd5"

payload = {
        "client_id": client_id,
        "client_secret": client_secret,
        "code": code,
        "grant_type": "authorization_code",
        "f": "json"
}
res = requests.post(auth_endpoint, data=payload, verify=False)
refresh_token = res.json()['refresh_token']
refresh_token



'c8c5c851df9ec3e2c9ee45c61d2a7f982d8d1f05'

## Login

In [5]:
import requests 

def get_access_token():
    # these params needs to be passed to get access
    # token used for retrieveing actual data
    payload = {
        "client_id": client_id,
        "client_secret": client_secret,
        'refresh_token': refresh_token,
        "grant_type": "refresh_token",
        'f': 'json'
    }
    res = requests.post(auth_endpoint, data=payload, verify=False)
    access_token = res.json()['access_token']

    return access_token

access_token = get_access_token()
print(access_token)

96047fec09c64ac4fbb7654c1b0954893f845811




In [6]:
def access_activity_data(access_token, params=None):
    
    headers = {'Authorization': f'Authorization: Bearer {access_token}'}    

    if not params:
        response = requests.get(activites_endpoint, headers=headers)
    response = requests.get(activites_endpoint, headers=headers, params=params)
    
    response.raise_for_status()
    activity_data = response.json()

    return activity_data

ACTIVITIES_PER_PAGE = 100
NUMBER_OF_PAGES = 5

activities_list = []
for p_number in range(NUMBER_OF_PAGES):

    GET_ALL_ACTIVITIES_PARAMS = {
        'per_page': ACTIVITIES_PER_PAGE,
        'page': p_number + 1
    }

    activities = access_activity_data(access_token, params=GET_ALL_ACTIVITIES_PARAMS)
    activities_list.append(activities)

activities_list

[[{'resource_state': 2,
   'athlete': {'id': 87979082, 'resource_state': 1},
   'name': 'Corrida Colo Colo 2k',
   'distance': 2203.8,
   'moving_time': 868,
   'elapsed_time': 906,
   'total_elevation_gain': 17.9,
   'type': 'Run',
   'sport_type': 'Run',
   'workout_type': 0,
   'id': 12299848598,
   'start_date': '2024-09-01T15:33:17Z',
   'start_date_local': '2024-09-01T11:33:17Z',
   'timezone': '(GMT-04:00) America/Santiago',
   'utc_offset': -14400.0,
   'location_city': None,
   'location_state': None,
   'location_country': None,
   'achievement_count': 0,
   'kudos_count': 0,
   'comment_count': 0,
   'athlete_count': 6,
   'photo_count': 0,
   'map': {'id': 'a12299848598',
    'summary_polyline': 'zkokEl{}mLJRCFEBMRWROAMQ]YIOa@e@_@g@g@g@IOCIa@[q@y@WQIKKGg@e@MCMQo@a@OQq@a@EQ@KHYDGHe@LSFIFQNuCFc@Be@DSFELIB@NADEFOJMx@]VULCJGVIj@?F@FCN@HDN?RF^FHAp@Fp@RXRF@V^N^C^MLS?IEI]IQCI?@OEIMQCUOSAQGcAGc@IOBY?S@[XSXEJMt@KhBAJ@h@Ph@HF?BLDNLHL?DGXIF_@GMMOGOSUi@e@{AE_A@i@Jk@AEEAE?G@GL?J?VEN@TKl

In [7]:
data_path = "..//data"
data_files = [f for f in os.listdir(data_path) if os.path.isfile(os.path.join(data_path, f)) and f.endswith('csv')]
data_files

['A la casa de la Karin__20210628.csv',
 'A mi casa __20220521.csv',
 'Bicicleta por la tarde__20220515.csv',
 'Caminata Hanga Roa__20230423.csv',
 'Caminata Volcán Rapa Nui__20230422.csv',
 'Camino a Buin__20220605.csv',
 'Cascada Cajón Maipo__20240106.csv',
 'Cerro San Cristobal__20230521.csv',
 'Chillán Run__20240809.csv',
 'Cueva Rapa Nui__20230426.csv',
 'Cueva Vírgenes Rapa Nui__20230424.csv',
 'Estero Huaquen Run__20240817.csv',
 'Gobierno Santiago Running - 5k__20240407.csv',
 'Half Marathon Cerrillos - 10k__20240526.csv',
 'Lagunilla Hiking Bajada__20240803.csv',
 'Lagunilla Hiking Subida__20240803.csv',
 'Media Vizcachas__20230430.csv',
 'Media Vizcacha__20220629.csv',
 'Media Vizcacha__20220703.csv',
 'Mitad de Media Vizcachas__20230513.csv',
 'Pasarela San Cristóbal__20231124.csv',
 'Peñalolén Run__20240707.csv',
 'Pirque - Vizcachas__20230506.csv',
 'Puente Alto Run__20240416.csv',
 'Puente Alto Run__20240422.csv',
 'Puente Alto Run__20240430.csv',
 'Puente Alto Run__20240

In [8]:
for activities in activities_list: 
    
    for d in activities: 

        id = d['id']
        activity_name = d['name'] 
        activity_type = d['type']
        start_date = datetime.strptime(d['start_date'], "%Y-%m-%dT%H:%M:%SZ")
        start_date_str = start_date.strftime("%Y%m%d")
        distance = f"{float(d['distance'])/1000:.2f}K"

        csv_name = f"{activity_name}__{start_date_str}.csv"
        
        if (d['type'] not in ["WeightTraining", "Workout", "VirtualRide"]) and (csv_name not in data_files):
            print(activity_name)
            print(activity_type)
            
            url = f"https://www.strava.com/api/v3/activities/{id}/streams"
            header = {'Authorization': 'Bearer ' + access_token}

            try:
                latlong = requests.get(url, headers=header, params={'keys':['latlng']}).json()[0]['data']
                altitude = requests.get(url, headers=header, params={'keys':['altitude']}).json()[1]['data']
                data_ = pd.DataFrame([*latlong], columns=['lat','long'])
                data_['altitude'] = altitude
                data_["distance"] = distance
                data_['activity_id'] = id 
                data_['activity_name'] = activity_name        
                data_['activity_type'] = activity_type    

                data_.to_csv(f'{data_path}//{csv_name}', index = False)
            except ValueError:
                print(f'Activity with Problems: {activity_name} - {activity_type}')


Corrida Colo Colo 2k
Run
Corrida Colo Colo 7k
Run
Puente Alto Run
Run
Treadmill
Run
Activity with Problems: Treadmill - Run
Treadmill
Run
Activity with Problems: Treadmill - Run
