In [2]:
# set up
import geopandas as gpd
from gpxcsv import gpxtolist
import pandas as pd
import pytz
from datetime import datetime
import math
import numpy as np
import os
from timezonefinder import TimezoneFinder
import pyproj
import matplotlib.pyplot as plt

# Question 1

Write a python function that creates a geopandas geodataframe from a gpx file provided by the user. The function should require as input the gpx file name. 

The function should convert all times to date time objects in the correct time zone. The correct time zone should be inferred from the lon and lat fields in the gpx file (hint: use the timezonefinder package found here https://pypi.org/project/timezonefinder/). 

Use the lon and lat fields to create a Geometry column in UTM coordinates (just as we did in class) with meters as the unit.

In [3]:
def file_to_geodataframe(gpx_file):

    # convert a geofile into geodataframe. must convert to list and then to dataframe and then to geo dataframe
    gpx_list = gpxtolist(gpx_file)

    gpx_dataframe = pd.DataFrame(gpx_list)

    # create geometry column
    geodataframe = gpd.GeoDataFrame(gpx_dataframe, geometry = gpd.points_from_xy(gpx_dataframe['lon'], gpx_dataframe['lat'], 
                                                                                 gpx_dataframe['ele']))

    # make geodataframe timezone aware. the timezone should not change in the middle of the run,
    # so you can just take the first latitude and longitude. timezone is originally in GMT, convert to 'gdf_timezone'
    geodataframe.time = pd.to_datetime(geodataframe['time'], format='%Y-%m-%dT%H:%M:%SZ')
    
    tf = TimezoneFinder()

    gdf_timezone = tf.timezone_at(lng = geodataframe['lon'].iloc[0], lat = geodataframe['lat'].iloc[0])

    gmt_zone = pytz.timezone('GMT')

    # if the datatime column is already zone aware it will throw and error. that's unnecessary. if exception is given,
    # it is already timezone aware.
    try:
        geodataframe['time'] = geodataframe['time'].dt.tz_localize(gmt_zone)
    except:
        print('already timezone aware ')

    # define timezone you want to convert to
    local_zone = pytz.timezone(gdf_timezone)

    # convert date time data to right time zone
    geodataframe['time'] = geodataframe['time'].dt.tz_convert(local_zone)

    # convert from geodetic to map distances
    # consulted https://app.datacamp.com/workspace/w/c08587a8-8c3b-44d9-8fd5-265a41777acf/edit/lecture_notes_track_analysis_1.ipynb
    # for converting to metered (easting and westing?)
    geodataframe = geodataframe.set_crs('epsg:4326')

    geodataframe = geodataframe.to_crs(epsg = 32615)

    return geodataframe

track_one = file_to_geodataframe('./Track 1.gpx')

# Question 2

Write a python function that computes the pace at every epoch in the geodataframe. The function should require as input the GeoDatafame and a smoothing parameter. The smoothing parameter should be an odd integer that defines the size of the smoothing kernel. The function output should be a numpy array that has the same number of records as the geodataframe. 

Demonstrate how the function works by writing code (in a script or notebook) to create a labeled graph showing pace as a function of time at 3 different smoothing levels. The graph should include a legend.

In [4]:
# computing and smoothing pace

def compute_pace(gdf, smoothing_int):

    # computing distance and cumulative distance. 
    # consulted previous group project on gpx files where distance was computed
    geodataframe = gdf.copy()
    geodataframe_shift = geodataframe.shift()

    geodataframe['dist_meters'] = geodataframe.geometry.distance(geodataframe_shift)

    geodataframe['cum_dist_meters'] = 0

    for i in range(1, len(geodataframe['dist_meters'])):
        geodataframe.cum_dist_meters.iloc[i] = geodataframe.cum_dist_meters.iloc[i-1] + geodataframe.dist_meters.iloc[i]

    # converting cumulative distance and distance to miles
    geodataframe['dist_miles'] = geodataframe['dist_meters'] / 1609.344
    geodataframe['cum_dist_miles'] = geodataframe['cum_dist_meters'] / 1609.344

    # computing elapsed time
    geodataframe['elapsed_time'] = geodataframe['time'] - geodataframe_shift['time']

    # smoothing the distance column using rolling function. the distance column is the problem value when it comes to 
    # computing pace. the gps has a wide variability when it is measuring the distance at each epoch due to gps technology
    # failures. we can check this using .query to on the time and distance columns. 
    if smoothing_int > 0:
        geodataframe['smooth_dist_miles'] = geodataframe['dist_miles'].rolling(window = smoothing_int, min_periods = 1).mean()
    else:
        geodataframe['smooth_dist_miles'] = geodataframe['dist_miles']

    # computing pace at each epoch
    geodataframe['pace_seconds-per-mile'] = geodataframe['elapsed_time'] / geodataframe['smooth_dist_miles']

    return geodataframe['pace_seconds-per-mile']

# doing some exploration using query to determine how to smooth pace. it is clear that there is wide variability in distance.
# there are only a few epochs where the elapsed time is greater than 1 second, most likely due to pausing. distance varies wildly
# between epochs though. This code only works if the above function returns a dataframe instead of a series. I just used it for
# investigative purposes.
'''
track_one.query('elapsed_time > "0 days 00:00:01"')
track_one.query('dist_miles > 0.002')
'''

'\ntrack_one.query(\'elapsed_time > "0 days 00:00:01"\')\ntrack_one.query(\'dist_miles > 0.002\')\n'

In [5]:
# graphing pace at different smoothing levels

track_one_pace = compute_pace(track_one, 0)
track_one_smoothed5 = compute_pace(track_one, 5)
track_one_smoothed25 = compute_pace(track_one, 25)
track_one_smoothed55 = compute_pace(track_one, 55)

# consulted https://app.datacamp.com/workspace/w/dc771853-0073-4f06-b080-8a0b347a5577/edit/lecture_notes_track_analysis_2.ipynb for
# plotly info. consulted chatgpt to make the scale logarithmic on the y axis. asked "using plotly, how do I change the y axis scale to logarithmic?"
import plotly.express as px

# creating line and making transluscent. consulted chatgpt; asked "how to make line plot transluscent in plotly"
fig = px.scatter(title = 'Smoothed pace vs time')

fig.add_scatter(x = track_one['time'], y = track_one_pace, mode = 'lines', name = 'Original Pace', line = dict(color = 'green'))
fig.add_scatter(x = track_one['time'], y = track_one_smoothed5, mode = 'lines', name = 'Pace at kernel size 5', line = dict(color = 'blue'))
fig.add_scatter(x = track_one['time'], y = track_one_smoothed25, mode = 'lines', name = 'Pace at kernel size 25', line = dict(color = 'red'))
fig.add_scatter(x = track_one['time'], y = track_one_smoothed55, mode = 'lines', name = 'Pace at kernel size 55', line = dict(color = 'orange'))

# make y scale logarithmic for readability. almost impossible to interpret before
fig.update_yaxes(type = 'log', showticklabels = False, title_text = 'Pace')
fig.update_xaxes(title_text = 'Time')

fig.show()


A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  geodataframe.cum_dist_meters.iloc[i] = geodataframe.cum_dist_meters.iloc[i-1] + geodataframe.dist_meters.iloc[i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  geodataframe.cum_dist_meters.iloc[i] = geodataframe.cum_dist_meters.iloc[i-1] + geodataframe.dist_meters.iloc[i]
A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  geodataframe.cum_dist_meters.iloc[i] = geodataframe.cum_dist_meters.iloc[i-1] + geodataframe.dist_meters.iloc[i]
A value is trying to be set on a copy

Important to note, the peeks seen in the graph are likely due to pauses the user makes. The peaks at the epochs where distance was miscalculated have been resolved.

# Question 3

Write a python function that classifies each epoch of the run as either running, walking, not moving, or paused recording. Any pace faster than 15 minutes per mile is considered running. Any pace between 15 and 60 minutes per mile is walking. Anything slower than 60 minutes per mile is not moving. Paused means that the recording device is turned off between epochs.

Demonstrate how the function works by reporting the amount of time spent running, walking, not moving, and not recording for any given geodataframe. Compute these values at 1, 5, and 9 second smoothing levels. 


In [6]:
# i pasted the earlier function but with a tweak to return the whole dataframe for testing the classify_movement function
def compute_pace_returndf(gdf, smoothing_int):

    # computing distance and cumulative distance. 
    # consulted previous group project on gpx files where distance was computed
    geodataframe = gdf.copy()
    geodataframe_shift = geodataframe.shift()

    geodataframe['dist_meters'] = geodataframe.geometry.distance(geodataframe_shift)

    geodataframe['cum_dist_meters'] = 0

    for i in range(1, len(geodataframe['dist_meters'])):
        geodataframe.cum_dist_meters.iloc[i] = geodataframe.cum_dist_meters.iloc[i-1] + geodataframe.dist_meters.iloc[i]

    # converting cumulative distance and distance to miles
    geodataframe['dist_miles'] = geodataframe['dist_meters'] / 1609.344
    geodataframe['cum_dist_miles'] = geodataframe['cum_dist_meters'] / 1609.344

    # computing elapsed time
    geodataframe['elapsed_time'] = geodataframe['time'] - geodataframe_shift['time']

    # smoothing the distance column using rolling function. the distance column is the problem value when it comes to 
    # computing pace. the gps has a wide variability when it is measuring the distance at each epoch due to gps technology
    # failures. we can check this using .query to on the time and distance columns. consulted https://pandas.pydata.org/docs/reference/api/pandas.DataFrame.rolling.html
    if smoothing_int > 0:
        geodataframe['smooth_dist_miles'] = geodataframe['dist_miles'].rolling(window = smoothing_int, min_periods = 1).mean()
    else:
        geodataframe['smooth_dist_miles'] = geodataframe['dist_miles']

    # computing pace at each epoch
    geodataframe['pace_seconds-per-mile'] = geodataframe['elapsed_time'] / geodataframe['smooth_dist_miles']

    return geodataframe

# defining function to classify the movement of each epoch in the geodataframe
def classify_movement(geodataframe):

    # movement classification initialization
    geodataframe['classification'] = ''

    # iterating through each row of the dataframe. we store the value of elapsed time and pace in each row so we can 
    # classify the movement in the epoch based off of these. consulted chatGPT: asked 'how to iterate through the rows of a data frame
    # and store certain column values in a variable to use in the loop".
    for index, row in geodataframe.iterrows():
        elapsed_time = row['elapsed_time']
        pace = row['pace_seconds-per-mile']

        # if elapsed time indicates that we are not paused (chose an arbitrary 7 seconds), go on to classify the epoch as running, walking, or stopped.
        if elapsed_time < pd.Timedelta("0 days 00:00:10"):
            if pace < pd.Timedelta("0 days 00:15:00"):
                geodataframe.at[index, 'classification'] = 'Running'
            elif pace < pd.Timedelta("0 days 00:60:00"):
                geodataframe.at[index, 'classification'] = 'Walking'
            else:
                geodataframe.at[index, 'classification'] = 'Stopped'
        # if elapsed time indicated that we are paused, classify the epoch as paused.
        else:
            geodataframe.at[index, 'classification'] = 'Paused'

    return geodataframe

In [12]:
# this function iterates through a geodataframe and sums the elapsed time of each epoch based on classification
def classification_summations(geodataframe):

    # initiating variables to store our values in
    time_spent_running = pd.Timedelta("0 days 00:00:00")
    time_spent_walking = pd.Timedelta("0 days 00:00:00")
    time_spent_stopped = pd.Timedelta("0 days 00:00:00")
    time_spent_paused = pd.Timedelta("0 days 00:00:00")

    # iterating through the geodataframe and summing each elapsed time of classification. we store the value of classification and 
    # elapsed time in each row to be used in our conditional statements
    for index, row in geodataframe.iterrows():
        classification = row['classification']
        elapsed_time = row['elapsed_time']

        # adding the elapsed time to our empty variables if conditions are met
        if classification == 'Running':
            time_spent_running = time_spent_running + elapsed_time
        elif classification == 'Walking':
            time_spent_walking = time_spent_walking + elapsed_time
        elif classification == 'Stopped':
            time_spent_stopped = time_spent_stopped + elapsed_time
        else:
            # i had trouble with this wanting to be equal to 'NaT'. this was because the first epoch was NaT, so anything I added to it
            # became NaT.
            if elapsed_time > pd.Timedelta('0 days 00:00:00'):
                time_spent_paused = time_spent_paused + elapsed_time


    # create a dictionary so we can return all of our classification times
    classification_dictionary = {'Running time' : time_spent_running,
                                 'Walking time' : time_spent_walking,
                                 'Stopped time' : time_spent_stopped,
                                 'Paused time' : time_spent_paused}
    
    return classification_dictionary

# create different levels of smoothed runs with classifications
track_one_unsmoothed = compute_pace_returndf(track_one, 0)
track_one_unsmoothed = classify_movement(track_one_unsmoothed)

track_one_5_smoothed = compute_pace_returndf(track_one, 5)
track_one_5_smoothed = classify_movement(track_one_5_smoothed)

track_one_9_smoothed = compute_pace_returndf(track_one, 9)
track_one_9_smoothed = classify_movement(track_one_9_smoothed)

# store the times of each smoothed run
unsmoothed_times = classification_summations(track_one_unsmoothed)
five_times = classification_summations(track_one_5_smoothed)
nine_times = classification_summations(track_one_9_smoothed)

# print results
print('Classification times for the unsmoothed run: ')
print(unsmoothed_times)
print('')
print('Classification times for the 5 smoothed run: ')
print(five_times)
print('')
print('Classification times for the 9 smoothed run: ')
print(nine_times)




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



Classification times for the unsmoothed run: 
{'Running time': Timedelta('0 days 00:45:39'), 'Walking time': Timedelta('0 days 00:02:07'), 'Stopped time': Timedelta('0 days 00:00:07'), 'Paused time': Timedelta('0 days 00:01:39')}

Classification times for the 5 smoothed run: 
{'Running time': Timedelta('0 days 00:46:58'), 'Walking time': Timedelta('0 days 00:00:54'), 'Stopped time': Timedelta('0 days 00:00:01'), 'Paused time': Timedelta('0 days 00:01:39')}

Classification times for the 9 smoothed run: 
{'Running time': Timedelta('0 days 00:47:26'), 'Walking time': Timedelta('0 days 00:00:27'), 'Stopped time': Timedelta('0 days 00:00:00'), 'Paused time': Timedelta('0 days 00:01:39')}


# Question 4

Write a python function that computes the fastest specificed interval during a run. The function should require as input a Geodataframe created using the function from Question 1 and the specified interval. For example, the function call to compute the fastest mile in a run might look like `fastest_mile = fastest_interval(geodataframe,1)` while call to compute the fastest quarter mile might look like 'fastest_quarter = fastest_interval(geodataframe,0.25)'.

Demonstrate this function by using it to compute the fastest mile, half mile, quarter mile, and 100m during the run. 

In [8]:
# defining a function to find the fastest paced specified interval over the geodataframe. I was able to create a basic function that 
# created a window that ran through the geodataframe and found the fastest pace and interval. I struggled with getting NaN responses due
# to infinite values, 0 values, or missing values. I consulted chatGPT: I asked "I am getting errors when running calculations in this
# function. How do I avoid including na values in my calculations of pace?""
def find_fastest_interval(geodataframe, interval_size):
    
    # initializing empty fastest pace and interval containers
    fastest_pace = None
    fastest_interval = None

    # using iterrows to iterate through every row of the geo data frame
    for index, row in geodataframe.iterrows():

        # designate the start of window
        current_distance = geodataframe['cum_dist_miles'].iloc[index]

        # provide valid bounds for window. we are not allowed to go outside the bounds of the data frame, so 
        # the valid rows cannot extend beyond 'current_distance + interval_size'
        if current_distance + interval_size < geodataframe['cum_dist_miles'].iloc[-1]: 
            valid_rows = geodataframe[
                (geodataframe['cum_dist_miles'] >= current_distance) & 
                (geodataframe['cum_dist_miles'] <= current_distance + interval_size)
                ]

        # excluding na values, calculate the window's distance, elapsed time, and pace
        if pd.notna(valid_rows['elapsed_time']).all() and pd.notna(valid_rows['cum_dist_miles']).all():
            valid_rows_time = valid_rows['time'].iloc[-1] - valid_rows['time'].iloc[0]
            valid_rows_distance = valid_rows['cum_dist_miles'].iloc[-1] - valid_rows['cum_dist_miles'].iloc[0]

            # avoid dividing by zero
            if valid_rows_distance == 0:
                continue

            valid_rows_pace = valid_rows_time / valid_rows_distance

            # storing the fastest pace and interval. will be updated every iteration if the pace is faster
            if fastest_pace is None or valid_rows_pace < fastest_pace:
                fastest_pace = valid_rows_pace
                fastest_interval = (current_distance, current_distance + interval_size)

    # initialize a dictionary to store results
    fastest_interval_dict = {}

    # return dicitonary of results
    if fastest_interval is not None:
        fastest_interval_dict = {'Fastest interval' : fastest_interval, 'Pace: ' : fastest_pace}
        return fastest_interval_dict
    else:
        return None
    
# print the fastest mile
fastest_mile = find_fastest_interval(track_one_unsmoothed, 1)
print('Fastest mile: ')
print(fastest_mile)
print('')

# print the fastest half mile
fastest_halfmile = find_fastest_interval(track_one_unsmoothed, 0.5)
print('Fastest half mile')
print(fastest_halfmile)
print('')

# print the fastest quarter mile
fastest_quartermile = find_fastest_interval(track_one_unsmoothed, 0.25)
print('Fastest quarter mile')
print(fastest_quartermile)
print('')

# print the fastest 100 meters
fastest_100_meters = find_fastest_interval(track_one_unsmoothed, 0.0621371)
print('Fastest 100 meters')
print(fastest_100_meters)
print('')

Fastest mile: 
{'Fastest interval': (3.6693746900695383, 4.669374690069539), 'Pace: ': Timedelta('0 days 00:08:54.120155659')}

Fastest half mile
{'Fastest interval': (0.0509177880227268, 0.5509177880227268), 'Pace: ': Timedelta('0 days 00:07:50.603411636')}

Fastest quarter mile
{'Fastest interval': (0.2523073160822762, 0.5023073160822762), 'Pace: ': Timedelta('0 days 00:07:35.868700644')}

Fastest 100 meters
{'Fastest interval': (0.25931081820350027, 0.32144791820350027), 'Pace: ': Timedelta('0 days 00:06:59.283030079')}



# Question 5

One way to think about how fit a person is to compare their effort to their pace throughout the run. 

Write a function that computes an efficiency score for the activity using some combination of data in the geodatabase. How you define the function is up to you, but consider how heart rate, elevation, and pace contribute. Describe your reasoning behind your scoring system in your function comments and indicate the units of your score. There is no correct answer here so your score will reflect how you approach the problem and implement it.

Demonstrate how this works by writing code to produce a graph of effort as a function of time across two different runs and describe how they differ or do not differ.

I was a long distance runner in high school, and, as a result, have done a fair bit of research regarding the most effective way to measure effort. From my understanding, heart rate is the most effective measure of effort we have--having known this it was something I utilized in cross country training all throughout highschool. Heart rate is an honest, inclusive measure of effort: naturally it takes into account pace, elevation, and other factors that may affect effort (ie, your heart rate increases as run steeper trails because you have to work harder, etc.).

In [9]:
# defines a function that measures what percentage of a run you spend in each heart rate zone.
# the heart rate zones are defined as follows:
# Zone 1 (recovery/easy) : 55%-65% of HR max
# Zone 2 (aerobic/base) : 65%-75% of HR max
# Zone 3 (tempo) : 80%-85% of HR max
# Zone 4 (lactate threshold) : 	85%-88% of HR max
# Zone 5 (anaerobic) : 90% of HR max and above
# it is important to note, max heartrates are not uniform across the runner population. because max HR varies person to person, the function
# that utilizes max HR must have max HR as a parameter. a common rule of thumb is to subtract your age from 220 to find
# your max heartrate. the function will also output a Rate of Percieved Exertion number (1-10). there are general guidlines for which
# heart rate zone corresponds to what number on the RPE scale.

def heartrate_zones(geodataframe, max_HR):
    
    # creating a heartrate_zone column initialized as an empty string so we can later store the heartrate zone for each epoch here.
    max_HR = float(max_HR)
    geodataframe['heartrate_zone'] = ''

    # creating out heartrate zone bins
    zones = {
        'Zone 1 (recovery/easy)' : (max_HR * 0.30, max_HR * 0.6599),
        'Zone 2 (aerobic/base)' : (max_HR * 0.66, max_HR * 0.7599),
        'Zone 3 (tempo)' : (max_HR * 0.76, max_HR * 0.8599),
        'Zone 4 (lactate threshold)' : (max_HR * 0.86, max_HR * 0.8899),
        'Zone 5 (anaerobic)' : (max_HR * 0.89, max_HR * 1.00)
    }

    # iterating through each row of the dataframe. we are storing the heartrate in each row because that is what we are using to
    # assign heartrate zones. i used the general loop framework utilized to assign classification to each epoch in the earlier function
    for index, row in geodataframe.iterrows():
        heartrate = row['hr']

        # for each row, we will check which bin the heartrate it belongs into and assign 
        # the corresponding heartrate zone in the heartrate zone column
        for zone, (minimum, maximum) in zones.items():
            if heartrate >= minimum and heartrate <= maximum:
                # i was having trouble assigning the zone to the whole data frame. i prompted chatGPT with a copy of my code,
                # and it discovered my problem and gave me a solution. 
                geodataframe.at[index, 'heartrate_zone'] = zone
                break 

    return geodataframe

def effort_calculation(geodataframe):
    
    # find which heart rate zone occurs most in the data frame
    zone_1_sum = 0
    zone_2_sum = 0
    zone_3_sum = 0
    zone_4_sum = 0
    zone_5_sum = 0

    # summing each occurence
    for zone in geodataframe['heartrate_zone']:
        if zone == 'Zone 1 (recovery/easy)':
            zone_1_sum += 1
        elif zone == 'Zone 2 (aerobic/base)':
            zone_2_sum += 1
        elif zone == 'Zone 3 (tempo)':
            zone_3_sum += 1
        elif zone == 'Zone 4 (lactate threshold)':
            zone_4_sum += 1
        elif zone == 'Zone 5 (anaerobic)':
            zone_5_sum += 1
    
    # calculating the total observations in the dataframe
    total = (zone_1_sum + zone_2_sum + zone_3_sum + zone_4_sum + zone_5_sum)

    # calculating the proportion of each heartrate zone in the run
    zone_1_proportion = zone_1_sum / total
    zone_2_proportion = zone_2_sum / total
    zone_3_proportion = zone_3_sum / total
    zone_4_proportion = zone_4_sum / total
    zone_5_proportion = zone_5_sum / total

    # creating dictionary so we can return the proportions from the function with labels
    zone_dictionary = {
       'Zone 1 (recovery/easy)': zone_1_proportion,
       'Zone 2 (aerobic/base)': zone_2_proportion,
       'Zone 3 (tempo)': zone_3_proportion,
       'Zone 4 (lactate threshold)': zone_4_proportion,
       'Zone 5 (anaerobic)': zone_5_proportion
    }

    # Find the zone with the highest proportion
    most_common_zone = max(zone_dictionary, key = zone_dictionary.get)

    # find a weighted measure of effort using zone prevalency
    weighted_RPE = 1 * zone_1_proportion + 2 * zone_2_proportion + 3 * zone_3_proportion + 4 * zone_4_proportion + 5 * zone_5_proportion

    # return statement
    return 'This run was largely a ' + most_common_zone + ' run.', zone_dictionary, 'This run had a rate of perceived effort of ' + str(weighted_RPE) + ' out of 5.'

# call both on unsmoothed run
track_one_unsmoothed = heartrate_zones(track_one_unsmoothed, 200)
effort_calculation(track_one_unsmoothed)

('This run was largely a Zone 1 (recovery/easy) run.',
 {'Zone 1 (recovery/easy)': 0.6124651810584958,
  'Zone 2 (aerobic/base)': 0.36629526462395545,
  'Zone 3 (tempo)': 0.021239554317548745,
  'Zone 4 (lactate threshold)': 0.0,
  'Zone 5 (anaerobic)': 0.0},
 'This run had an rate of perceived effort of 1.408774373259053 out of 5.')

Because we are using heartrate as the measure of effort, the best way to compare the effort of two runs would be to visualize a rolling average across time.

In [10]:
# compute a rolling average for heartrate across two runs. first we need to convert them to geodataframes
track_one_rolling = file_to_geodataframe('./Track 1.gpx')
track_two_rolling = file_to_geodataframe('./Track 2.gpx')

# compute rolling average of heartrate
track_one_rolling_average = track_one_rolling['hr'].rolling(window = 10, min_periods = 1).mean()
track_two_rolling_average = track_two_rolling['hr'].rolling(window = 10, min_periods = 1).mean()

# create scatter fig to be used for each track
fig1 = px.scatter(title = 'Heartrate rolling average vs time for track one')
fig2 = px.scatter(title = 'Heartrate rolling average vs time for track two')

# add scatter plot to each figure
fig1.add_scatter(x = track_one_rolling['time'], y = track_one_rolling_average, mode = 'lines', line = dict(color = 'orange'))
fig2.add_scatter(x = track_two_rolling['time'], y = track_two_rolling_average, mode = 'lines', line = dict(color = 'blue'))

# label the axes of each plot
fig1.update_yaxes(title_text = 'Heartrate')
fig1.update_xaxes(title_text = 'Time')

fig2.update_yaxes(title_text = 'Heartrate')
fig2.update_xaxes(title_text = 'Time')

# show each figure
fig1.show()
fig2.show()

The rolling average across track two seems to be higher in most places than the rolling average in track one. There is a dip in track two around the ~ 18:00, most likely due to a pause. The heart rate in track one does get higher later in the run. This might lead us to believe that, overall, track two is a higher effort run than track one.

In [11]:
# track one test
test_1 = file_to_geodataframe('./Track 1.gpx')
test_1_pace = compute_pace_returndf(test_1, 10)
test_1_pace = classify_movement(test_1_pace)
test_1_times = classification_summations(test_1_pace)
print(test_1_times)
fastest_mile_one = find_fastest_interval(test_1_pace, 1)
print('Fastest mile: ')
print(fastest_mile_one)
print('')
test_1_pace = heartrate_zones(test_1_pace, 200)
print(effort_calculation(test_1_pace))

print('')

# track two test
test_2 = file_to_geodataframe('./Track 2.gpx')
test_2_pace = compute_pace_returndf(test_2, 10)
test_2_pace = classify_movement(test_2_pace)
test_2_times = classification_summations(test_2_pace)
print(test_2_times)
fastest_mile_two = find_fastest_interval(test_2_pace, 1)
print('Fastest mile: ')
print(fastest_mile_two)
print('')
test_2_pace = heartrate_zones(test_2_pace, 200)
print(effort_calculation(test_2_pace))

# track 3 test
test_3 = file_to_geodataframe('./Track 3.gpx')
test_3_pace = compute_pace_returndf(test_3, 10)
test_3_pace = classify_movement(test_3_pace)
test_3_times = classification_summations(test_3_pace)
print(test_3_times)
fastest_mile_three = find_fastest_interval(test_3_pace, 1)
print('Fastest mile: ')
print(fastest_mile_three)
print('')
test_3_pace = heartrate_zones(test_3_pace, 200)
print(effort_calculation(test_3_pace))

# track 4 test
test_4 = file_to_geodataframe('./Track 4.gpx')
test_4_pace = compute_pace_returndf(test_4, 10)
test_4_pace = classify_movement(test_4_pace)
test_4_times = classification_summations(test_4_pace)
print(test_4_times)
fastest_mile_four = find_fastest_interval(test_4_pace, 1)
print('Fastest mile: ')
print(fastest_mile_four)
print('')
test_4_pace = heartrate_zones(test_4_pace, 200)
print(effort_calculation(test_4_pace))




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



{'Running time': Timedelta('0 days 00:47:28'), 'Walking time': Timedelta('0 days 00:00:25'), 'Stopped time': Timedelta('0 days 00:00:00'), 'Paused time': Timedelta('0 days 00:01:39')}
Fastest mile: 
{'Fastest interval': (3.6693746900695383, 4.669374690069539), 'Pace: ': Timedelta('0 days 00:08:54.120155659')}

('This run was largely a Zone 1 (recovery/easy) run.', {'Zone 1 (recovery/easy)': 0.6124651810584958, 'Zone 2 (aerobic/base)': 0.36629526462395545, 'Zone 3 (tempo)': 0.021239554317548745, 'Zone 4 (lactate threshold)': 0.0, 'Zone 5 (anaerobic)': 0.0}, 'This run had an rate of perceived effort of 1.408774373259053 out of 5.')





A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



{'Running time': Timedelta('0 days 00:56:15'), 'Walking time': Timedelta('0 days 00:00:05'), 'Stopped time': Timedelta('0 days 00:00:00'), 'Paused time': Timedelta('0 days 00:03:22')}
Fastest mile: 
{'Fastest interval': (6.072274682069636, 7.072274682069636), 'Pace: ': Timedelta('0 days 00:06:37.790243379')}

('This run was largely a Zone 2 (aerobic/base) run.', {'Zone 1 (recovery/easy)': 0.2770549970431697, 'Zone 2 (aerobic/base)': 0.6877587226493199, 'Zone 3 (tempo)': 0.03518628030751035, 'Zone 4 (lactate threshold)': 0.0, 'Zone 5 (anaerobic)': 0.0}, 'This run had an rate of perceived effort of 1.7581312832643405 out of 5.')




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



{'Running time': Timedelta('0 days 00:54:31'), 'Walking time': Timedelta('0 days 00:00:28'), 'Stopped time': Timedelta('0 days 00:00:01'), 'Paused time': Timedelta('0 days 00:00:00')}
Fastest mile: 
{'Fastest interval': (5.3326804785382285, 6.3326804785382285), 'Pace: ': Timedelta('0 days 00:07:16.198953006')}

('This run was largely a Zone 2 (aerobic/base) run.', {'Zone 1 (recovery/easy)': 0.061488673139158574, 'Zone 2 (aerobic/base)': 0.6618122977346278, 'Zone 3 (tempo)': 0.2766990291262136, 'Zone 4 (lactate threshold)': 0.0, 'Zone 5 (anaerobic)': 0.0}, 'This run had an rate of perceived effort of 2.215210355987055 out of 5.')




A value is trying to be set on a copy of a slice from a DataFrame

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy



{'Running time': Timedelta('0 days 00:14:46'), 'Walking time': Timedelta('0 days 00:34:17'), 'Stopped time': Timedelta('0 days 00:00:21'), 'Paused time': Timedelta('0 days 00:00:00')}
Fastest mile: 
{'Fastest interval': (2.1568360262785173, 3.1568360262785173), 'Pace: ': Timedelta('0 days 00:14:22.693375232')}

('This run was largely a Zone 1 (recovery/easy) run.', {'Zone 1 (recovery/easy)': 0.9927536231884058, 'Zone 2 (aerobic/base)': 0.007246376811594203, 'Zone 3 (tempo)': 0.0, 'Zone 4 (lactate threshold)': 0.0, 'Zone 5 (anaerobic)': 0.0}, 'This run had an rate of perceived effort of 1.0072463768115942 out of 5.')
