In [None]:
from functools import cache
from pathlib import Path

import duckdb
import folium
import pandas as pd
from haversine import haversine
from itables import init_notebook_mode
from loguru import logger
from tqdm.notebook import tqdm

In [None]:
init_notebook_mode(all_interactive=True)

In [None]:
# !python src/example_package/healthkit_to_sqlite.py /Users/mjboothaus/icloud/Data/apple_health_export/export_2024_03_25.zip ./data/healthkit-2024-03-25-fix.db

In [None]:
# This file was produced by the (commented) command above

HEALTH_DB = "../data/healthkit-2024-03-25-fix.db"

In [None]:
assert Path(HEALTH_DB).exists()

In [None]:
con = duckdb.connect(HEALTH_DB)
con.install_extension("sqlite")
con.load_extension("sqlite")

In [None]:
con.sql("PRAGMA show_tables");

In [None]:
walk_dates = [
    "2020-05-11",
    "2020-05-18",
    "2020-05-25",
    "2020-06-01",
    "2021-03-02",
    "2021-03-12",
    "2021-04-03",
    "2021-10-15",
    "2021-10-29",
    "2023-05-23",
    "2024-02-26",
    "2024-02-27",
]
all_dates = "', '".join(walk_dates)

In [None]:
walks_sql = f"""
SELECT * FROM workouts 
WHERE sourceName != 'AllTrails' 
AND LEFT(startDate, 10) IN ('{all_dates}') 
AND duration >= 40
""".replace(
    "\n", ""
)

In [None]:
walks_df = con.sql(walks_sql).to_df()

In [None]:
walks_df

In [None]:
walk_id = con.sql(walks_sql.replace("*", "id")).to_df()

In [None]:
walk_ids = walk_id["id"].values.tolist()

In [None]:
def update_map(m, df, n=10):
    """
    Update the map with points from the DataFrame.

    Parameters:
    - m: The map object to update.
    - df: The DataFrame containing latitude and longitude columns.
    - n: Downsampling factor, indicating how many rows to skip. Default is 10, meaning every 10th row is taken.
    """
    # Downsample the DataFrame by taking every nth row
    df_downsampled = df.iloc[::n, :]

    points = df_downsampled[["latitude", "longitude"]].values.tolist()
    folium.PolyLine(points, color="blue", weight=3.5, opacity=1).add_to(m)
    folium.Marker(
        [df["latitude"].iloc[0], df["longitude"].iloc[0]], icon=folium.Icon(color="green")
    ).add_to(m)
    folium.Marker(
        [df["latitude"].iloc[-1], df["longitude"].iloc[-1]], icon=folium.Icon(color="red")
    ).add_to(m)
    return m

In [None]:
@cache
def get_walk_data_for_id(con, id):
    walk_sql = "SELECT * FROM workout_points WHERE workout_id = 'WORKOUT_ID'"
    return con.sql(walk_sql.replace("WORKOUT_ID", id)).to_df()

In [None]:
def create_map(walk_ids, con, SCALAR=1.00001):
    all_walks_df = {}
    m = folium.Map(location=[43.3183, -1.9812], zoom_start=12, tiles="openstreetmap")
    for id in tqdm(walk_ids, total=len(walk_ids)):
        logger.info(f"Getting data for walk ID: {id}")
        walk_df = get_walk_data_for_id(con, id)
        all_walks_df[id] = walk_df
        m = update_map(m, walk_df)
        bounds = m.get_bounds()
        scaled_bounds = [
            [bounds[0][0] / SCALAR, bounds[0][1] / SCALAR],
            [bounds[1][0] * SCALAR, bounds[1][1] * SCALAR],
        ]
        m.fit_bounds(scaled_bounds)
    return m, all_walks_df, bounds

In [None]:
m, all_walks_df, bounds = create_map(walk_ids, con)

In [None]:
# m

In [None]:
@cache
def get_walk_start_end_locations(con):
    query = """
    SELECT DISTINCT
        workout_id as id,
        start_latitude,
        start_longitude,
        end_latitude,
        end_longitude
    FROM (
        SELECT 
            workout_id,
            FIRST_VALUE(latitude) OVER (PARTITION BY workout_id ORDER BY date) AS start_latitude,
            FIRST_VALUE(longitude) OVER (PARTITION BY workout_id ORDER BY date) AS start_longitude,
            LAST_VALUE(latitude) OVER (PARTITION BY workout_id ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS end_latitude,
            LAST_VALUE(longitude) OVER (PARTITION BY workout_id ORDER BY date ROWS BETWEEN UNBOUNDED PRECEDING AND UNBOUNDED FOLLOWING) AS end_longitude
        FROM workout_points
    ) AS subquery;
"""
    return con.sql(query).to_df()

In [None]:
walk_start_end_df = get_walk_start_end_locations(con)

In [None]:
def find_nearby_walks(walks_df, reference_walk_id, location="start", max_distance_km=1):
    """
    Find walks within a specified distance from the start or end location of a reference walk.

    Parameters:
    - walks_df: DataFrame containing walks data with columns ['id', 'start_latitude', 'start_longitude', 'end_latitude', 'end_longitude']
    - reference_walk_id: The ID of the reference walk.
    - location: 'start' or 'end' to use the start or end location of the reference walk.
    - max_distance_km: The maximum distance in kilometers to consider a walk as nearby.

    Returns:
    - A list of walk IDs that are within the specified distance from the reference location.
    """
    # Extract the reference location.
    reference_row = walks_df.loc[walks_df["id"] == reference_walk_id]
    if location == "start":
        reference_point = (
            reference_row["start_latitude"].values[0],
            reference_row["start_longitude"].values[0],
        )
    else:  # 'end'
        reference_point = (
            reference_row["end_latitude"].values[0],
            reference_row["end_longitude"].values[0],
        )

    # Calculate distances and filter walks.
    nearby_walks = []
    for _, row in walks_df.iterrows():
        if location == "start":
            walk_point = (row["start_latitude"], row["start_longitude"])
        else:  # 'end'
            walk_point = (row["end_latitude"], row["end_longitude"])

        distance = haversine(reference_point, walk_point)
        if distance <= max_distance_km and row["id"] != reference_walk_id:
            nearby_walks.append((row["id"], distance))

    return nearby_walks, reference_walk_id

In [None]:
nearby_walks, ref_walk_id = find_nearby_walks(
    walk_start_end_df, "e0db8356ce08a6e309cd9501916310c70f6f9fca", max_distance_km=8.0
)

In [None]:
nearby_walks

In [None]:
ref_walk_id

In [None]:
def plot_walks_and_nearby(con, reference_walk_id, nearby_walks_info, SCALAR=1.0001):
    """
    Plots the specified walk and nearby walks on a map.

    Parameters:
    - reference_walk_id: The ID of the reference walk.
    - nearby_walks_info: A list of tuples (walk_id, distance_km) for nearby walks.
    """
    # Extract reference walk location.
    reference_walk = get_walk_data_for_id(con, reference_walk_id)
    ref_lat, ref_lon = reference_walk[["latitude", "longitude"]].values[0]

    # Create a map centered around the reference walk.
    m = folium.Map(location=[ref_lat, ref_lon], zoom_start=12)

    # Plot the reference walk in blue.
    folium.Marker(
        [ref_lat, ref_lon], popup=f"ID: {reference_walk_id}", icon=folium.Icon(color="blue")
    ).add_to(m)

    # Plot nearby walks in red.
    for walk_id, distance_km in nearby_walks_info:
        walk = get_walk_data_for_id(con, walk_id)
        # walk = walks_df.loc[walks_df['id'] == walk_id]
        lat, lon = walk[["latitude", "longitude"]].values[0]
        folium.Marker(
            [lat, lon], popup=f"ID: {walk_id}, Dist: {distance_km:.2f} km", icon=folium.Icon(color="red")
        ).add_to(m)
    bounds = m.get_bounds()
    scaled_bounds = [
        [bounds[0][0] / SCALAR, bounds[0][1] / SCALAR],
        [bounds[1][0] * SCALAR, bounds[1][1] * SCALAR],
    ]
    m.fit_bounds(scaled_bounds)
    return m

In [None]:
m = plot_walks_and_nearby(con, ref_walk_id, nearby_walks)

In [None]:
m