In [9]:
import pandas as pd
import geopandas as gpd
from shapely.geometry import Point
from gpxcsv import gpxtolist
from math import radians, sin, cos, sqrt, atan2

def haversine_distance(point1, point2):
    R = 6371.0  #Calculating the distance between two geographic points
    
    lat1, lon1 = radians(point1.y), radians(point1.x)
    lat2, lon2 = radians(point2.y), radians(point2.x)
    dlat = lat2 - lat1
    dlon = lon2 - lon1
    #Converts the lat and lon to the correct format then calculates the difference between
    #The two points. 
    
    a = sin(dlat / 2)**2 + cos(lat1) * cos(lat2) * sin(dlon / 2)**2
    c = 2 * atan2(sqrt(a), sqrt(1 - a))
    #The distance is calculated by multiplying the angular distance by Earth's radius
    #The distance is returned in meters. 
    distance = R * c * 1000 
    return distance

def create_geodataframe(gpx_list):
    #Defines the function and turns it into a geodataframe
    gpx_df = pd.DataFrame(gpx_list)
    #Creates the dataframe
    gpx_df['time'] = pd.to_datetime(gpx_df['time'], format='%Y-%m-%dT%H:%M:%SZ')
    gpx_df['geometry'] = gpx_df.apply(lambda row: Point(row['lon'], row['lat']), axis=1)
    #converts the time into a datetime string. For each row in the dataframe, 
    #a point object is created for lat and lon and stored in the geometry column

    gdf = gpd.GeoDataFrame(gpx_df, geometry='geometry')
    
    gdf['cumulative_distance'] = 0.0
    #initializes a new column called cumulative distance
    for i in range(1, len(gdf)):
        gdf.at[i, 'cumulative_distance'] = gdf.at[i-1, 'cumulative_distance'] + \
                                           haversine_distance(gdf.iloc[i-1].geometry, gdf.iloc[i].geometry)
        #Calculates the distance by looping through each row
    return gdf

def fastest_interval(gdf, interval_miles):
    #Defines a function to find the fastest time to cover a certain distance
    interval_km = interval_miles * 1.60934
    interval_m = interval_km * 1000 
    #Converts eveything to meters because it's easier for calculations
    fastest_time = None
    start_time, end_time = None, None

    for i in range(len(gdf)):
        target_distance = gdf.iloc[i]['cumulative_distance'] + interval_m
        #loops over every point in the geodataframe and calculates the target distance
        #by adding the interval to the current distance
        
        subset = gdf[gdf['cumulative_distance'] >= target_distance]
        
        if not subset.empty:
            j = subset.index[0] 
            time_taken = (gdf.iloc[j]['time'] - gdf.iloc[i]['time']).total_seconds()

            if fastest_time is None or time_taken < fastest_time:
                fastest_time = time_taken
                start_time = gdf.iloc[i]['time']
                end_time = gdf.iloc[j]['time']
                #updates the fastest time if its the first valid interval or faster than previous intervals

    return {
        'start_time': start_time,
        'end_time': end_time,
        'time_seconds': fastest_time
    }
    #Returns everythig in seconds

gpx_list = gpxtolist('/Users/laurenmoffett/Desktop/Strava/backup/Track-2.gpx')
gdf = create_geodataframe(gpx_list)


In [10]:
from datetime import timedelta

def format_time(seconds):
    return str(timedelta(seconds=seconds))
#takes the argument in seconds and returns it so it can be displayed in a readable format

def display_fastest_interval(interval_name, result):
    if result['time_seconds'] is None:
        print(f"No valid {interval_name} interval found.")
    else:
        start_time = result['start_time'].strftime('%Y-%m-%d %H:%M:%S')
        end_time = result['end_time'].strftime('%Y-%m-%d %H:%M:%S')
        time_formatted = format_time(result['time_seconds'])
        #This function takes two arguments, interval_name and result. Checks
        #if a valid interval was found or not, then it prints a message saying no valid
        #interval was found or it converts the start time into Y-M-D H:M:S format
        #The same thing is done for end time. The total time is printed in seconds
        
        print(f"Fastest {interval_name}:")
        print(f"  Start Time: {start_time}")
        print(f"  End Time: {end_time}")
        print(f"  Time Taken: {time_formatted}")
        print("-" * 40) #Aesthetic purposes
        #Displays all of the formatted start time

fastest_mile = fastest_interval(gdf, 1)  #Finds interval for one mile
fastest_half_mile = fastest_interval(gdf, 0.5)  #interval for half a mile
fastest_quarter_mile = fastest_interval(gdf, 0.25) #interval for .25 mile
fastest_100m = fastest_interval(gdf, 0.0621371) #fastest 100 m time

display_fastest_interval("mile", fastest_mile)
display_fastest_interval("half-mile", fastest_half_mile)
display_fastest_interval("quarter-mile", fastest_quarter_mile)
display_fastest_interval("100 meters", fastest_100m)


Fastest mile:
  Start Time: 2015-01-18 17:31:50
  End Time: 2015-01-18 17:39:03
  Time Taken: 0:07:13
----------------------------------------
Fastest half-mile:
  Start Time: 2015-01-18 17:32:14
  End Time: 2015-01-18 17:35:47
  Time Taken: 0:03:33
----------------------------------------
Fastest quarter-mile:
  Start Time: 2015-01-18 17:31:56
  End Time: 2015-01-18 17:33:42
  Time Taken: 0:01:46
----------------------------------------
Fastest 100 meters:
  Start Time: 2015-01-18 17:33:06
  End Time: 2015-01-18 17:33:31
  Time Taken: 0:00:25
----------------------------------------
