<a href="https://colab.research.google.com/github/Dom3442/garminconnect-similaractivities/blob/master/Garminconnect_matchedruns.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

Optional first step to link with google drive to save garmin files there for future use to be fast this can be *skipped*



In [0]:
from google.colab import drive
drive.mount('/content/gdrive')
os.chdir('/content/gdrive/My Drive/')

First stage is to get you garmin data.  There is a handy tool on Github to do this for you here https://github.com/pe-st/garmin-connect-export

In [0]:
# First clone the code so you can run it from here

!git clone https://github.com/pe-st/garmin-connect-export

In [0]:
# then need to use it to download your data

# count number is how many things to download 30 is last 30 use all to download all

!python garmin-connect-export/gcexport.py --count 30 --directory garmin_connect_export

We now have our data 

In [0]:
# import needed python packages

import matplotlib.pyplot as plt
import matplotlib.dates as mdates
import datetime
import pandas as pd
import numpy as np
import os

In [0]:
# all need gpxpy this will install and import it if you don't have it or just import if you already have it
!pip install gpxpy
import gpxpy



In [0]:
# set location of downloaded data

folder = ('./garmin_connect_export2/')

In [0]:
df = pd.read_csv(folder + 'activities.csv')

In [0]:
# show basic table of activities we need to decide which actitivty we want to look for matches on by number
df[['Start Time', 'Activity Name', 'Duration (h:m:s)']].iloc[:30]

In [0]:
# select activity to find matches for

activity_to_match = df.loc[9]

We are going to use three criteria to try and get match 

1) Activity type

2) Activity length

3) Activity start and end points

Once this is done we will load gpx files and check are the same

In [0]:
# first filter to only activities of same time

df_possmatches = df[df['Activity Type'] == activity_to_match['Activity Type']]

In [0]:
# then check similar length I have used a length difference threshold of 0.5 here this could be changed

thresh_length = 0.5

length_dif = abs(df_possmatches['Distance (km)'] - activity_to_match['Distance (km)'])

poss_matches = length_dif<thresh_length

df_possmatches = df_possmatches[poss_matches]

In [0]:
# check start and end position the same

# get start and end points of run to match to

start_pt = [activity_to_match['Begin Latitude (°DD)'], activity_to_match['Begin Longitude (°DD)']]
end_pt = [activity_to_match['End Latitude (°DD)'], activity_to_match['End Longitude (°DD)']]

In [0]:
# get distances from all these points

start_pt_dif = ((df_possmatches['Begin Latitude (°DD)']-start_pt[0])**2 + \
                 (df_possmatches['Begin Longitude (°DD)']-start_pt[1])**2)**0.5
end_pt_dif = ((df_possmatches['End Latitude (°DD)']-end_pt[0])**2 + \
              (df_possmatches['End Longitude (°DD)']-end_pt[1])**2)**0.5

In [0]:
# apply threshold to remove far away points

thresh_startend = 0.001

poss_matches = (start_pt_dif + end_pt_dif)<thresh_startend

df_possmatches = df_possmatches[poss_matches]

In [0]:
print('We have ' + str(len(df_possmatches)) + ' possible matches')

Now load these files and check gpx traces are the same or now

In [0]:
# function to load files

def gpxtodf(fname):
    gpx_file = open(fname, 'r')
    gpx = gpxpy.parse(gpx_file)
    track = gpx.tracks[0].segments[0].points
    
    data = []
    for point_idx, point in enumerate(track):
        data.append([point.longitude, point.latitude, point.elevation, point.time])

    df = pd.DataFrame(data, columns=['lon', 'lat', 'alt', 'time'])
    return(df)


In [0]:
# folder name of activity you are interested in

fname_tomatch = folder + 'activity_' + str(activity_to_match['Activity ID']) + '.gpx'

# get file for run you are checking

dfgpx_tomatch = gpxtodf(fname_tomatch)


In [0]:
# then check against other files using distance between every 10th point and anywhere on other trace 

cumdif= []

for idx in range(df_possmatches.shape[0]):
    print(str(idx+1) + ' of ' + str(df_possmatches.shape[0]))

    fname_test = folder + 'activity_' + str(df_possmatches['Activity ID'].iloc[idx]) + '.gpx'
    dfgpx_test = gpxtodf(fname_test)
   
    
    mindist = []
    for idx2 in range(0,dfgpx_test.shape[0],10):
        dist = ((dfgpx_tomatch['lon']-dfgpx_test['lon'].iloc[idx2])**2+\
                (dfgpx_tomatch['lat']-dfgpx_test['lat'].iloc[idx2])**2)**0.5
        mindist.append(dist.min())
    for idx2 in range(0,dfgpx_tomatch.shape[0],10):
        dist = ((dfgpx_test['lon']-dfgpx_tomatch['lon'].iloc[idx2])**2+\
                (dfgpx_test['lat']-dfgpx_tomatch['lat'].iloc[idx2])**2)**0.5
        mindist.append(dist.min())
    cumdif.append(np.mean(mindist))

In [0]:
# now apply threshold to this distance I found 1E-4 to be a good value but this may  need altering

thresh = 1E-4

idx_match = np.array(cumdif)<thresh

activities_matched = df_possmatches[idx_match]

In [0]:
useful_cols = ['Start Time', 'Elapsed Duration (h:m:s)', 'Average Speed (km/h or min/km)', 'Average Heart Rate (bpm)', 'Elevation Gain (m)']

In [0]:
activities_matched[useful_cols]

Now we have summary make a plot

In [0]:
# function to get rolling mean

def rollingmean(times, win=3):
    # need to handle edges
    n = len(times)
    smoothtimes = times.copy()
    
    for idx in range(win):
        smoothtimes.iloc[idx] = times.iloc[:(idx+win)].mean()
    
    for idx in range(win,n-win):
        smoothtimes.iloc[idx] = times.iloc[(idx-win):(idx+win)].mean()
        
    for idx in range(n-win,n):
        smoothtimes.iloc[idx] = times.iloc[idx:].mean()
    return(smoothtimes)

In [0]:
activities_matched.insert(50,'Ave_speed_rolling',rollingmean(pd.to_datetime(activities_matched['Average Speed (km/h or min/km)']))) 

In [0]:
# make plot moving average will not work well if you have not done route a lot

ax = plt.subplot()
ax.plot(range(len(activities_matched),0,-1),pd.to_datetime(activities_matched['Average Speed (km/h or min/km)']),'o')
ax.plot(range(len(activities_matched),0,-1), activities_matched['Ave_speed_rolling'])
ax.invert_yaxis()
ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.yaxis.set_label_text('Pace')
ax.xaxis.set_label_text('Attempt Number')
plt.show()

In [0]:
# bonus plotting of last 5 runs of route to show are same
ax = plt.subplot()

if 5>len(activities_matched):
  toplot = len(activities_matched)
else:
  toplot = 5

for idx in range(toplot):
  fname_plot = folder + 'activity_' + str(activities_matched['Activity ID'].iloc[idx]) + '.gpx'
  dfgpx_plot = gpxtodf(fname_plot)
  ax.plot(dfgpx_plot['lon'], dfgpx_plot['lat'])

In [0]:
# additional options for looking at heart rate
ax = plt.subplot()
ax.plot(pd.to_datetime(activities_matched['Average Speed (km/h or min/km)']), activities_matched['Average Heart Rate (bpm)'],'o')
ax.invert_xaxis()
ax.xaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.xaxis.set_label_text('Pace')
ax.yaxis.set_label_text('Heart Rate')
plt.show()

In [0]:
# Or filter resutls by heart rate

activities_matched_lowhr = activities_matched[activities_matched['Average Heart Rate (bpm)']<150]

ax = plt.subplot()
ax.plot(range(len(activities_matched_lowhr),0,-1),pd.to_datetime(activities_matched_lowhr['Average Speed (km/h or min/km)']),'o')
ax.invert_yaxis()
ax.yaxis.set_major_formatter(mdates.DateFormatter('%H:%M'))
ax.yaxis.set_label_text('Pace')
ax.xaxis.set_label_text('Attempt Number')
plt.show()