In [3]:
import pandas as pd
import numpy as np
from math import radians, sin, cos, asin, sqrt

# Define function 

def hms_to_sec(hms: str) -> float:
    """Convert 'HH:MM:SS' → total seconds (or NaN if invalid)."""
    try:
        h, m, s = map(int, hms.split(':'))
        return h * 3600 + m * 60 + s
    except:
        return np.nan

def haversine_km(lat1, lon1, lat2, lon2) -> float:
    """Great-circle distance (km) between two lat/lon points."""
    lat1, lon1, lat2, lon2 = map(radians, [lat1, lon1, lat2, lon2])
    dlat, dlon = lat2 - lat1, lon2 - lon1
    a = sin(dlat/2)**2 + cos(lat1)*cos(lat2)*sin(dlon/2)**2
    return 2 * 6371 * asin(sqrt(a))

def heat_index_c(temp_c: float, humidity: float) -> float:
    """Approximate 'feels-like' temperature in °C."""
    return 0.5 * (temp_c + 61 + (temp_c - 68)*1.2 + humidity*0.094)


In [4]:
# Load your data
df = pd.read_csv("activities_cleaned.csv")

# Time‐based features
df["Begin Timestamp"] = pd.to_datetime(df["Begin Timestamp"], dayfirst=True)
df["dow"]          = df["Begin Timestamp"].dt.weekday
df["hour_of_day"]  = df["Begin Timestamp"].dt.hour
df["is_weekend"]   = df["dow"] >= 5

# Duration & pace
df["duration_s"]        = df["Duration (h:m:s)"].apply(hms_to_sec)
df["pace_s_per_km"]     = df["duration_s"] / df["Distance (Raw)"]
df["pace_s_per_km"].replace([np.inf, -np.inf], np.nan, inplace=True)

# Elevation
df["elev_gain_per_km"]  = df["Elevation Gain (Raw)"] / df["Distance (Raw)"]
df["elev_gain_per_km"].replace([np.inf, -np.inf], np.nan, inplace=True)
df["net_elev_change"]   = df["Elevation Gain (Raw)"] - df["Elevation Loss (Raw)"]

# Geometry
df["start_end_dist_km"] = df.apply(
    lambda r: haversine_km(
        r["Begin Latitude (Decimal Degrees Raw)"],
        r["Begin Longitude (Decimal Degrees Raw)"],
        r["End Latitude (Decimal Degrees Raw)"],
        r["End Longitude (Decimal Degrees Raw)"],
    ), axis=1
)

# Weather “feels like”
df["heat_index"]       = df.apply(
    lambda r: heat_index_c(r["Temperature (Raw)"], r["Humidity (Raw)"]),
    axis=1
)

# Save
df.to_csv("activities_expanded.csv", index=False)
print("✅ activities_expanded.csv written")


✅ activities_expanded.csv written


The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["pace_s_per_km"].replace([np.inf, -np.inf], np.nan, inplace=True)
The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  df["elev_gain_per_km"].replace([np.inf, -np.inf], np.nan, inplace=True)
