In [44]:
import pandas as pd
import ast

# Preprocessing

## 1. Merge Garmin and Strava Activity Datasets

In [109]:
garmin_df = pd.read_csv("erick-garmin-activities.csv")
strava_df = pd.read_csv("max-strava-activities.csv")

### 1.1 Filter for Only Runs

In [110]:
garmin_runs_df = garmin_df[garmin_df["Activity Type"] == "Running"]
strava_runs_df = strava_df[strava_df["type"] == "Run"]

### 1.2 Extract Average GAP from Strava DF

In [None]:
def compute_weighted_avg_grade_adjusted_speed(splits):
    try:
        splits = ast.literal_eval(splits)
        total_weighted_speed = sum(split['average_grade_adjusted_speed'] * split['distance'] for split in splits)
        total_distance = sum(split['distance'] for split in splits)
        weighted_avg_speed = total_weighted_speed / total_distance if total_distance != 0 else 0
        return weighted_avg_speed
    except Exception:
        return None # We will decide whether we want GAP or not later on (removes ~26 runs if not).

In [112]:
strava_runs_df["average_grade_adjusted_speed"] = strava_runs_df["splits_metric"].apply(compute_weighted_avg_grade_adjusted_speed)

### 1.3 Column Unit Conversions
| Garmin    | Garmin Units | Strava | Strava Units |
| --------- | ------------ | ------ | ------------ |
| Distance | km | distance | m |
| Avg Run Cadence | spm | average_cadence | half of garmin's value |
| Avg Pace | min/km | average_speed | m/s |
| Best Pace | min/km | max_speed | m/s |
| Avg GAP | min/km | average_grade_adjusted_speed | m/s |
| Elapsed Time | min:sec | elapsed_time | sec |
| Moving Time | min:sec | moving_time | sec |

In [113]:
def pace_to_mps(pace_str):
    if pace_str == "--":
        return None
    mins, secs = map(int, pace_str.split(":"))
    minutes_per_km = mins + secs / 60
    meters_per_second = 1000 / (minutes_per_km * 60)
    return meters_per_second

In [114]:
def time_str_to_sec(time):
    hrs, mins, secs = map(float, time.split(":"))
    return (hrs * 3600) + (mins * 60) + secs

In [115]:
# Convert Garmin units to Strava units (except cadence, which we will follow Garmin's units)
garmin_runs_df_converted = garmin_runs_df.copy()
strava_runs_df_converted = strava_runs_df.copy()
garmin_runs_df_converted["Distance"] *= 1000
# strava_runs_df_converted["average_cadence"] *= 2 # Apparently Apple Watch doesn't record cadence.
garmin_runs_df_converted["Avg Pace"] = garmin_runs_df_converted["Avg Pace"].apply(pace_to_mps)
garmin_runs_df_converted["Best Pace"] = garmin_runs_df_converted["Best Pace"].apply(pace_to_mps)
garmin_runs_df_converted["Avg GAP"] = garmin_runs_df_converted["Avg GAP"].apply(pace_to_mps)
garmin_runs_df_converted["Elapsed Time"] = garmin_runs_df_converted["Elapsed Time"].apply(time_str_to_sec)
garmin_runs_df_converted["Moving Time"] = garmin_runs_df_converted["Moving Time"].apply(time_str_to_sec)

### 1.4 Column Name Coversion & Merge

In [119]:
# Apple Watch doesn't seem to record power as well.
garmin_to_strava_cols = {
    "Distance": "distance",
    "Calories": "calories",
    "Avg HR": "average_heartrate",
    "Max HR": "max_heartrate",
    # "Avg Run Cadence": "average_cadence",
    "Avg Pace": "average_speed",
    "Best Pace": "max_speed",
    "Avg GAP": "average_grade_adjusted_speed",
    "Total Ascent": "total_elevation_gain",
    # "Avg Power": "average_watts",
    # "Max Power": "max_watts",
    "Elapsed Time": "elapsed_time",
    "Moving Time": "moving_time",
    "Min Elevation": "elev_low",
    "Max Elevation": "elev_high"
}

In [117]:
garmin_runs_df_converted = garmin_runs_df_converted.rename(columns=garmin_to_strava_cols)

desired_columns = list(garmin_to_strava_cols.values())
garmin_filtered_df = garmin_runs_df_converted[desired_columns]
strava_filtered_df = strava_runs_df_converted[desired_columns]

combined_df = pd.concat([garmin_filtered_df, strava_filtered_df], ignore_index=True)

In [None]:
combined_df

Unnamed: 0,distance,calories,average_heartrate,max_heartrate,average_speed,max_speed,average_grade_adjusted_speed,total_elevation_gain,elapsed_time,moving_time,elev_low,elev_high
0,6510.0,351,158,175,3.311258,10.309278,3.311258,37,2014.0,1964.0,324,346
1,6180.0,317,149,171,3.322259,3.968254,3.300330,23,2005.0,1852.0,178,190
2,5040.0,269,156,170,3.267974,3.521127,3.257329,21,1580.0,1538.0,169,186
3,4930.0,259,157,169,3.378378,4.716981,3.355705,41,1492.0,1458.0,328,350
4,5510.0,297,156,171,3.194888,4.184100,3.154574,27,1778.0,1714.0,337,351
...,...,...,...,...,...,...,...,...,...,...,...,...
417,10458.0,520.0,124.2,145.0,2.779000,8.800000,2.894542,105.6,4048.0,3763.0,106.1,189.6
418,10494.0,524.0,138.5,172.0,2.812000,4.300000,,140.5,3895.0,3732.0,106.1,150.0
419,10453.8,524.0,158.6,175.0,2.855000,5.700000,,138.9,3831.0,3662.0,106.1,149.8
420,10149.1,522.0,141.0,166.0,2.682000,4.400000,,0.0,3784.0,3784.0,,
