In [1]:
import base64
import datetime

import folium
import folium.map
import great_tables
import polars as pl
import pyproj
import pytz
import xmltodict
from great_tables import GT, html
from timezonefinder import TimezoneFinder

#### Inputs

In [2]:
gpx_file = r"Route_66_10k.gpx"
person_name = "Aaron T. Leese"

#### Read GPX file and get properties and make a Dataframe of the individual GPS points

In [3]:
## Read the gpx file into a dictionary
with open(gpx_file) as f:
    doc = xmltodict.parse(f.read())

## get the activity properties
activity_name = doc["gpx"]["trk"]["name"]
activity_type = doc["gpx"]["trk"]["type"]
activity_date_str = doc["gpx"]["metadata"]["time"]
activity_creator = doc["gpx"]["@creator"]
first_point = doc["gpx"]["trk"]["trkseg"]["trkpt"][0]

## create a proper local datetime from the zulu timezone
tf = TimezoneFinder()
tz_name = tf.timezone_at(lng=float(first_point["@lon"]), lat=float(first_point["@lat"]))
activity_date_zulu = datetime.datetime.strptime(activity_date_str, "%Y-%m-%dT%H:%M:%SZ")
activity_date_no_tz = datetime.datetime(
    activity_date_zulu.year, activity_date_zulu.month, activity_date_zulu.day
)
tz = pytz.timezone(tz_name)
tz_offset = tz.utcoffset(activity_date_no_tz).total_seconds() / 3600
et_tz = datetime.timezone(datetime.timedelta(hours=tz_offset))
activity_date = datetime.datetime.strptime(
    activity_date_str, "%Y-%m-%dT%H:%M:%SZ"
).astimezone(et_tz)

## read the individual points from the trkpt XML tags into a lazyframe
activity_lf = pl.DataFrame(doc["gpx"]["trk"]["trkseg"]["trkpt"]).lazy()

## add new columns for the previous lat/long and how much elevation was gained or lost
activity_df = (
    activity_lf.with_row_index()
    .with_columns(
        pl.col("@lat").cast(pl.Float32),
        pl.col("@lon").cast(pl.Float32),
        pl.col("ele").cast(pl.Float32) * 3.2808399,
    )
    .with_columns(
        pl.col("ele").shift(1).alias("ele_previous").fill_null(strategy="backward"),
        pl.col("@lat").shift(1).alias("@lat_previous").fill_null(strategy="backward"),
        pl.col("@lon").shift(1).alias("@lon_previous").fill_null(strategy="backward"),
    )
    .with_columns(
        pl.col("index").cast(pl.Int64),
        (pl.col("ele") - pl.col("ele_previous")).alias("ele_change"),
        ## TODO make a way to figure out which part of the struct has the HR data
        pl.col("extensions").struct[-1].struct[0].alias("hr"),
        pl.col("time").str.to_datetime(),
    )
    .with_columns(
        pl.when(pl.col("ele_change") > 0)
        .then("ele_change")
        .otherwise(0)
        .alias("ele_gain"),
        pl.when(pl.col("ele_change") < 0)
        .then("ele_change")
        .otherwise(0)
        .alias("ele_loss"),
    )
    .drop("extensions", "ele_previous")
).collect()

## compute the number of meters between each GPS point and the previous one
geod = pyproj.Geod(ellps="WGS84")
forward_az, back_az, distance = geod.inv(
    activity_df["@lon"].to_numpy(),
    activity_df["@lat"].to_numpy(),
    activity_df["@lon_previous"].to_numpy(),
    activity_df["@lat_previous"].to_numpy(),
)

## add the meters traveled to the dataframe, converted to miles
## then make the cumsum of miles traveled
## then floor the cumsum so we can group the splits
activity_df = (
    activity_df.with_columns(
        pl.col("hr").cast(pl.Int16),
        pl.Series(values=distance * 3.2808399 / 5_280, name="distance"),
    )
    .with_columns(pl.cum_sum("distance").alias("elapsed_distance"))
    .with_columns(pl.col("elapsed_distance").floor().alias("elapsed_distance_floor"))
    .drop("@lat_previous", "@lon_previous", "ele_change")
)

#### Group the activity dataframe by the mile splits and compute stats

In [4]:
## group by the distance floor and roll up some stats for each split
activity_split_df = (
    activity_df.group_by(pl.col("elapsed_distance_floor"))
    .agg(
        pl.col("distance").sum(),
        pl.col("ele_gain").sum(),
        pl.col("ele_loss").sum().abs(),
        pl.col("ele").alias("elevations"),
        pl.col("hr").max().alias("hr_max"),
        pl.col("time").min().alias("start_time"),
        pl.col("time").max().alias("end_time"),
        pl.col("@lat").first(),
        pl.col("@lon").first(),
    )
    .sort("elapsed_distance_floor")
    .with_columns(
        pl.col("elapsed_distance_floor").cast(pl.Int8),
        pl.col("elevations").list.gather_every(10).alias("elevations_filtered"),
        (
            (pl.col("end_time") - pl.col("start_time")).dt.total_seconds()
            / pl.col("distance")
            * 1000
        )
        .cast(pl.Duration(time_unit="ms"))
        .alias("pace"),
    )
    .with_columns(
        (pl.col("pace").dt.total_seconds() / 60)
        .floor()
        .cast(pl.Int8)
        .alias("pace_total_min"),
        (pl.col("pace").dt.total_seconds().mod(60)).alias("pace_remain_seconds"),
    )
    .with_columns(
        (
            pl.format(
                "{}:{}",
                pl.col("pace_total_min"),
                pl.col("pace_remain_seconds").cast(pl.String).str.pad_start(2, "0"),
            ).alias("pace")
        )
    )
    .drop("start_time", "end_time", "pace_total_min", "pace_remain_seconds")
)

#### Create an overview map of the activity

In [5]:
## Create the map of the activity
folium_map = folium.Map(
    location=(activity_df["@lat"][0], activity_df["@lon"][0]),
    width="50%",
    height="50%",
    zoom_start=13,
)

## Add GPS track to map
coords = list(zip(activity_df["@lat"], activity_df["@lon"]))
folium.PolyLine(
    locations=coords,
    color="purple",
    weight=5,
).add_to(folium_map)


## Add mile marker markers
def add_mile_marker(row):
    folium.Marker(
        location=[row["@lat"], row["@lon"]],
        icon=folium.map.Icon(icon=str(row["elapsed_distance_floor"]), prefix="fa"),
    ).add_to(folium_map)


for row in activity_split_df.iter_rows(named=True):
    add_mile_marker(row)


## Add start marker
def add_point_marker(row, color):
    folium.Marker(
        location=[row["@lat"], row["@lon"]], icon=folium.map.Icon(color=color)
    ).add_to(folium_map)


for row in activity_df.head(1).iter_rows(named=True):
    add_point_marker(row, "green")
## Add end marker
for row in activity_df.tail(1).iter_rows(named=True):
    add_point_marker(row, "red")

## Save map to B64 encoded png so we can add it to the table as HTML
img_data = folium_map._to_png(1)
img_base64 = base64.b64encode(img_data).decode("ascii")

#### Put it all together and make a great GPX table

In [6]:
great_gpx_table = (
    GT(activity_split_df)
    .tab_header(
        title=html(f"<b>{activity_name}</b>"),
        subtitle=f"A {activity_type} adventure by {person_name}",
    )
    .cols_move_to_start(
        columns=[
            "elapsed_distance_floor",
            "ele_gain",
            "ele_loss",
            "elevations_filtered",
            "hr_max",
        ]
    )
    .fmt_number(columns=["distance"], decimals=2, use_seps=True)
    .fmt_number(columns=["ele_gain", "ele_loss"], decimals=1, use_seps=True)
    .fmt_nanoplot(
        columns="elevations_filtered",
        # autoscale=True,
        options=great_tables.nanoplot_options(
            show_data_points=False,
            data_line_stroke_color="green",
            data_line_stroke_width=8,
            data_area_fill_color="black",
        ),
    )
    .data_color(
        columns=["hr_max"],
        palette="Reds",
    )
    .cols_hide(["elevations", "distance", "@lat", "@lon"])
    .tab_spanner(
        label="Elevation (Feet)",
        columns=["ele_gain", "elevations_filtered", "ele_loss"],
    )
    .cols_label(
        elapsed_distance_floor=html("Split #<br>(Miles)"),
        ele_gain=html("Gain"),
        ele_loss=html("Loss"),
        hr_max=html("Maximum Heart <br>Rate (Beats/Min)"),
        elevations_filtered="Plot",
        pace=html("Pace<br>(Min/Mile)"),
    )
    .tab_source_note(f"Date: {activity_date:%B %d, %Y}")
    .tab_source_note(f"GPX Source: {activity_creator}")
    .tab_source_note(html(f'<img src="data:image/png;base64, {img_base64}">'))
)
display(great_gpx_table)
great_gpx_table.save(r"great_gpx.png")

Route 66 10k,Route 66 10k,Route 66 10k,Route 66 10k,Route 66 10k,Route 66 10k
A running adventure by Aaron T. Leese,A running adventure by Aaron T. Leese.1,A running adventure by Aaron T. Leese.2,A running adventure by Aaron T. Leese.3,A running adventure by Aaron T. Leese.4,A running adventure by Aaron T. Leese.5
0,37.4,552522552550549548547545544545545544543541539539538537537536536536535531529528527530535537541539541539537536537538538537537537535534533526522522524530535,54.5,171,8:09
1,27.6,537490535535531528524524524524521520520520518518517518520522525529532535537537537537535536535533531530529531530530530529526526524522518514507502495490,75.5,175,7:56
2,45.3,485446485479475465457451449447446448446446446448449453459469470469469469467467467467468467467467467469470470470470470470470470471471471472471472472472472470471,61.0,178,8:22
3,23.0,482470471470470471471472472473472473473474474474472473474473473472472472473474474474474475474474476476476476476476476477476477478478478479480480480481481482482482,12.5,172,8:37
4,40.7,509478482483484483479478482483484488489490490491491491492493494494495496496497499499500500501501503504504504505507507507508509508508507507507507507508509509,13.8,176,8:11
5,54.5,522500509509510511512512513505500501507514514514514510505501503507512513514514514514514515517519521520518517514512512514518520521521522521521522522,41.3,184,7:43
6,16.4,534521521522524524524524523523524526528530532534534,3.9,191,7:23
Split # (Miles),Elevation (Feet),Elevation (Feet),Elevation (Feet),Maximum Heart Rate (Beats/Min),Pace (Min/Mile)
Split # (Miles),Gain,Plot,Loss,Maximum Heart Rate (Beats/Min),Pace (Min/Mile)
"Date: June 08, 2024","Date: June 08, 2024","Date: June 08, 2024","Date: June 08, 2024","Date: June 08, 2024","Date: June 08, 2024"
