In [32]:
from IPython.display import HTML

HTML(
'''
<script>
    code_show = false; // Code cells will be hidden by default

    function code_toggle() {
        if (code_show) {
            document.querySelectorAll('.jp-CodeCell .jp-InputArea').forEach(function(el) {
                el.style.display = 'none';
            });
        } else {
            document.querySelectorAll('.jp-CodeCell .jp-InputArea').forEach(function(el) {
                el.style.display = 'block';
            });
        }
        code_show = !code_show;
    }

    // Hide code cells when the page is loaded
    document.addEventListener("DOMContentLoaded", function() {
        document.querySelectorAll('.jp-CodeCell .jp-InputArea').forEach(function(el) {
            el.style.display = 'none';
        });
    });
</script>
<form action="javascript:code_toggle()">
    <input type="submit" value="Show / Hide code">
</form>
'''
)

As explained in the readme file, this automation project relies on windows' tools to launch a script uploading and naming a ride to Strava. That being said, before said automation could be carried out. Existing data on Strava had to be standardised and cleaned, by giving the same name to similar route. This allows easy comparison between rides, and, although Strava does have a feature detecting "similar" rides, the treshold is such that two identical routes with the exception of a few hundred meters would be deemed different. Let's dive in ! 

In [33]:
import re
import subprocess
import os,string
import win32api
import win32file
import glob
from datetime import datetime
import shutil
from stravalib.client import Client
import urllib3
from urllib.parse import urlparse,urlsplit
from datetime import datetime
from shapely import LineString
import polyline 


import time
import pickle as pkl
import pandas as pd


from shapely import LineString
from shapely import frechet_distance
from shapely import geometry
import folium

CODE=os.getenv('CODE')
CLIENT_ID=os.getenv('CLIENT_ID')
CLIENT_SECRET=os.getenv('CLIENT_SECRET')
REFRESH_TOKEN=os.getenv('REFRESH_TOKEN')
redirect_url="https://localhost"
scope=['read_all','profile:read_all','activity:read_all','activity:write']

To Start of with, we connect to Strava's API

In [34]:
with open('access_token.pkl', 'rb') as f:
    access_token = pkl.load(f)

client=Client()


if time.time() > access_token['expires_at']:
    print('Token has expired, will refresh')
    refresh_response = client.refresh_access_token(client_id=CLIENT_ID, client_secret=CLIENT_SECRET, refresh_token=access_token['refresh_token'])
    access_token = refresh_response
    with open('access_token.pkl', 'wb') as f:
        pkl.dump(refresh_response, f)
    print('Refreshed token saved to file')
    client.access_token = refresh_response['access_token']
    client.refresh_token = refresh_response['refresh_token']
    client.token_expires_at = refresh_response['expires_at']
        
else:
    print('Token still valid, expires at {}'
          .format(time.strftime("%a, %d %b %Y %H:%M:%S %Z", time.localtime(access_token['expires_at']))))
    client.access_token = access_token['access_token']
    client.refresh_token = access_token['refresh_token']
    client.token_expires_at = access_token['expires_at']

Token still valid, expires at Tue, 04 Feb 2025 18:46:03 Romance Standard Time


We start by fetching all activities from the day I rode my bike for the first time, and them to a list. Each activity is a json file containing a great number of variables. 

In [35]:
all_activities=client.get_activities(after='2020-12-24')
act_list=[]
for i in all_activities: 
    act_list.append(i.dict())


We arange those into a pandas dataframe, we convert distances from meters to kilometres and decode the "polyline", the route's trace using googles format into shapely's linestring format. 

In [36]:
act_df=pd.json_normalize(act_list)
act_df=act_df[['name', 'start_date','distance', 'average_speed', 'moving_time','gear_id','total_elevation_gain','start_latlng','end_latlng','location_city','map.id','map.summary_polyline','id','sport_type','has_heartrate','device_watts','elapsed_time','average_cadence','max_watts','average_watts','average_heartrate','max_heartrate']]

gears={}
for gear in list(act_df.gear_id.unique()):
    if pd.isna(gear)==False:
        gears[gear]=dict(client.get_gear(gear))['name']



gears_inverse={value:key for key,value in gears.items()}

act_df=act_df[act_df['sport_type']=='Ride']
act_df.loc[:, 'distance'] /= 1000 # convert from m to km
act_df.loc[:, 'average_speed'] *= 3.6 # convert from m/s to km/h

act_df['ROUTE']=act_df['map.summary_polyline'].apply(polyline.decode)
act_df['ROUTE_shp']=act_df['ROUTE'].apply(LineString)
act_df['GEAR']=act_df.apply(lambda x: gears[x['gear_id']],axis=1)

In [37]:
act_df.sample(n=5)

Unnamed: 0,name,start_date,distance,average_speed,moving_time,gear_id,total_elevation_gain,start_latlng,end_latlng,location_city,...,device_watts,elapsed_time,average_cadence,max_watts,average_watts,average_heartrate,max_heartrate,ROUTE,ROUTE_shp,GEAR
252,LEUV-TERVUREN,2024-10-08 08:53:55+00:00,45.927,28.1088,0 days 01:38:02,b9512064,275.2,"[50.883341236039996, 4.689431497827172]","[50.88323017582297, 4.68938447535038]",,...,False,0 days 01:43:49,,,150.1,153.5,189.0,"[(50.88263, 4.68909), (50.88189, 4.68879), (50...","LINESTRING (50.88263 4.68909, 50.88189 4.68879...",Roubaix
138,LEUV-MECHELEN,2024-01-09 09:04:53+00:00,56.1854,28.62,0 days 01:57:47,b9512064,41.1,"[50.87408191524446, 4.693046947941184]","[50.87423907592893, 4.6928035374730825]",,...,False,0 days 01:58:20,,,125.5,,,"[(50.87488, 4.69124), (50.87595, 4.69069), (50...","LINESTRING (50.87488 4.69124, 50.87595 4.69069...",Roubaix
60,LEUV-PELLENBERG,2023-05-14 09:44:05+00:00,32.9031,26.8308,0 days 01:13:35,b9512064,171.4,"[50.87352686561644, 4.691632920876145]","[50.87419490329921, 4.692838490009308]",,...,False,0 days 01:29:48,,,122.0,,,"[(50.87251, 4.69144), (50.87219, 4.69132), (50...","LINESTRING (50.87251 4.69144, 50.87219 4.69132...",Roubaix
88,LEU-TERV,2023-09-12 13:18:50+00:00,38.2904,26.3736,0 days 01:27:07,b9512064,273.7,"[50.874234130606055, 4.692835137248039]","[50.88855997659266, 4.701632279902697]",,...,False,0 days 01:30:33,,,127.7,,,"[(50.874, 4.69148), (50.87288, 4.69118), (50.8...","LINESTRING (50.874 4.69148, 50.87288 4.69118, ...",Roubaix
208,BLA-WAVRE,2024-06-30 13:08:15+00:00,62.5661,28.2384,0 days 02:12:56,b9512064,544.2,"[50.673470199108124, 4.383419696241617]","[50.673615876585245, 4.383254069834948]",,...,False,0 days 02:15:16,,,147.0,,,"[(50.67225, 4.38492), (50.6701, 4.38621), (50....","LINESTRING (50.67225 4.38492, 50.6701 4.38621,...",Roubaix


In [38]:
unnamed_rides=['Afternoon Ride','Morning Ride',"Sortie vélo dans l'après-midi","Lunch Ride"]
no_name_df=act_df[act_df['name'].isin(unnamed_rides)]
named_df=(act_df[(~act_df['name'].isin(unnamed_rides)) & (act_df['GEAR']=='Roubaix')]).drop_duplicates(subset='name',keep='last')
Existing_rides=act_df.copy()[act_df['GEAR']=='Roubaix'].drop_duplicates(subset='name',keep='last')
print('There are ' + str(len(Existing_rides))+ ' different routes in the dataset')

There are 120 different routes in the dataset


We will then only keep one instance of each ride, taht we would compare to new ones. The following illustrate the process that undergoes a new activity when automatically named. In practice the operation is the following: 
- An USB device is plugged into my personal laptop triggering an event. 
- The event triggers a python script
- If the USB device is recognised as my bike gps the script runs, and stops otherwise
- If a ride is new, its name (garmin defaults name) , ie date when it was started, is stored as a txt file called "Last_ride". New rides overwrite it, if no rides in the gps are more recent, the script stops. 
- The new ride is uploaded on Strava and saved locally. 

Then the naming starts: 
- All previous activities are querried through Strava's API. Including the new one, which will be named.
- The activities with similar distance, (+- 5km), elevation (+- 20 m) and with a similar starting position are stored in a dataframe. 
- If the Dataframe is empty, no name will be attributed to the new ride as no comparable rides exist. The script stops.
- Else the fretchet distance between the new activity and other similar ones is computed. The frechet distance is a measure of similarity between curves, taking into account the location and ordering of the point along the curve. 
- The route with the smallest frechet distance is identified .
- Our new ride gets a similar title if said frechet distance falls below a given threshold 

One could ask, why use the frechet distance ? Well, solely relying on distance, elevation and start coordinate isn't a good indicator that rides are similar: 

In [39]:
dist_lim=5
elev_lim=20
dist_buffer=0.02

colours=['red','blue','green','purple','orange','darkred','lightred','beige','darkblue','darkgreen','cadetblue','darkpurple','white','pink','lightblue','lightgreen','gray','black','lightgray']
match=0
New_ride=no_name_df.sample(n=1)
map=folium.Map(location=[New_ride.start_latlng.values[0][0],New_ride.start_latlng.values[0][1]],zoom_start=10)
for index, row in no_name_df.sample(n=10).iterrows(): 
    NEW_RIDE=row
    ROUTE=(NEW_RIDE['ROUTE'])
    DISTANCE=NEW_RIDE['distance']
    ELEVATION=NEW_RIDE['total_elevation_gain']
   

    START=geometry.Point(NEW_RIDE['start_latlng'][0],NEW_RIDE['start_latlng'][1])
    RADIUS=START.buffer(dist_buffer)
    ID=NEW_RIDE['id']
    similar_ride=[]
    for index_named, activity in named_df.iterrows():
        
        
        if isinstance(activity['start_latlng'],list):
            if (DISTANCE-dist_lim < activity['distance'] < DISTANCE+dist_lim) and (
                        ELEVATION-elev_lim < row['total_elevation_gain'] < ELEVATION+elev_lim) and geometry.Point(
                            activity['start_latlng'][0],activity['start_latlng'][1]).within(RADIUS): 
                
                similar_ride.append(activity['ROUTE'])
            else: 
                pass

        else: 
            pass
            
        
    if similar_ride==[]:
        pass
        
    else :
        folium.PolyLine(ROUTE,color=colours[match]).add_to(map)

        for rides in similar_ride:

            folium.PolyLine(rides,color=colours[match],dash_array='10').add_to(map)
    match+=1


display(map)

Dashed rides are rides with similar metrics as the one we want to name but one can easily see that the course itself is very different. In the script's implementation, this filtering was done solely to limit the number of frechet distances that had to be calculated. 

In [40]:
colours=['red','blue','green','purple','orange','darkred','lightred','beige','darkblue','darkgreen','cadetblue','darkpurple','white','pink','lightblue','lightgreen','gray','black','lightgray']
match=0
test=[]
map=folium.Map(location=[New_ride.start_latlng.values[0][0],New_ride.start_latlng.values[0][1]],zoom_start=11)
for index, row in no_name_df.iterrows(): 
    NEW_RIDE=row
    ROUTE=(NEW_RIDE['ROUTE'])
    DISTANCE=NEW_RIDE['distance']
    ELEVATION=NEW_RIDE['total_elevation_gain']
    

    START=geometry.Point(NEW_RIDE['start_latlng'][0],NEW_RIDE['start_latlng'][1])
    RADIUS=START.buffer(dist_buffer)
    ID=NEW_RIDE['id']
    similar_ride=[]
    for index_named, activity in named_df.iterrows():
        
        
        if isinstance(activity['start_latlng'],list):
            if (DISTANCE-dist_lim < activity['distance'] < DISTANCE+dist_lim) and (
                        ELEVATION-elev_lim < row['total_elevation_gain'] < ELEVATION+elev_lim) and geometry.Point(
                            activity['start_latlng'][0],activity['start_latlng'][1]).within(RADIUS): 

                frechet=frechet_distance(NEW_RIDE['ROUTE_shp'],activity['ROUTE_shp'])
                if frechet<=0.02:
                    similar_ride.append(activity['ROUTE'])
                    
            else: 
                pass

        else: 
            pass
            
        
    if similar_ride==[]:
        pass
        
    else :
        folium.PolyLine(ROUTE,color=colours[match]).add_to(map)
       

        for rides in similar_ride:
            test.append(rides)

            folium.PolyLine(rides,color=f'dark{colours[match]}',dash_array='20').add_to(map)
          
        match+=1



display(map)


Instead, using the frechet distance, we can find perfectly matching routes, rename the activities and keep a clean Strava account. 