<a href="https://colab.research.google.com/github/RodzanIskandar/Strava-Data/blob/main/Strava_testing.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [1]:
import pandas as pd
from datetime import datetime, timedelta

In [2]:
raw_df = pd.read_csv('/content/drive/MyDrive/Exploration Data Field/Personal Project/Strava/export_118123662/activities.csv')

# Run Dataset

In [None]:
pd.set_option('display.max_columns', None)
raw_df.head()

## Activity Filter

In [None]:
raw_df['Activity Type'].unique()

In [None]:
raw_df = raw_df[raw_df['Activity Type'] == 'Run']

## Distance filter (exclude bugs)

In [None]:
raw_df = raw_df[raw_df['Distance'] >= 1]

In [None]:
important_columns = ['Filename', 'Activity ID', 'Activity Type', 'Activity Description', 'Activity Date', 'Distance','Moving Time', 'Average Speed', 'Elapsed Time', 'Average Elapsed Speed', 'Total Steps']

In [None]:
df = raw_df[important_columns]

In [None]:
df.tail()

In [None]:
df.describe()

columns descriptions:
1. Activity_id
2. Distance = distance in km
3. Elapsed time = from start to end in seconds
4. Moving time = part of elapsed time when user moving
5. Average Speed = speed in m/s

In [None]:
df.info()

In [None]:
df['Activity Date Modified'] = pd.to_datetime(df['Activity Date'], format="%b %d, %Y, %I:%M:%S %p")
df['Activity Date Modified Local'] = df['Activity Date Modified'] + timedelta(hours=7)

In [None]:
df['Average Pace'] = 60 / (df['Average Speed'] * (0.001 / (1 / 3600)) )

# Overtime data

In [None]:
df.head()

In [None]:
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
metrics_check_overtime = ['Distance', 'Average Pace']

for metric in metrics_check_overtime:

  plt.figure(figsize = (15, 5))
  sns.lineplot(y=df[metric], x=df['Activity Date Modified'])
  plt.show()


# Distance vs Pace

In [None]:
plt.figure(figsize = (5, 5))
sns.scatterplot(y=df['Distance'], x=df['Average Pace'])
plt.show()

# GPX Dataset

In [None]:
for gpx_file in df.tail()['Filename']:

  main_loc = "/content/drive/MyDrive/Exploration Data Field/Personal Project/Strava/export_118123662/"
  gpx_loc = main_loc + gpx_file


In [None]:
gpx_loc

In [None]:
import gpxpy

In [None]:
with open(gpx_loc, "r") as f:
    gpx = gpxpy.parse(f)

In [None]:
gpx

In [None]:
points = []

for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            points.append({
                "lat": point.latitude,
                "lon": point.longitude,
                "elev": point.elevation,
                "time": point.time
            })

In [None]:
gpx_sample_df = pd.DataFrame(points)

In [None]:
gpx_sample_df.head()

In [None]:
import gpxpy.geo

time_diff = []
distances = []
speeds = []

prev_point = None
prev_time = None

for track in gpx.tracks:
    for segment in track.segments:
        for point in segment.points:
            if prev_point is None:
                time_diff.append(0)
                distances.append(0)
                speeds.append(0)
            else:
                d = gpxpy.geo.haversine_distance(
                    prev_point.latitude, prev_point.longitude,
                    point.latitude, point.longitude
                )
                t = (point.time - prev_time).total_seconds()

                time_diff.append(t)
                distances.append(d)
                speeds.append(d / t if t > 0 else 0)

            prev_point = point
            prev_time = point.time


In [None]:
gpx_sample_df["distance_m"] = distances
gpx_sample_df["speed_mps"] = speeds
gpx_sample_df["cum_distance_m"] = gpx_sample_df["distance_m"].cumsum()
gpx_sample_df['time_diff_seconds'] = time_diff

In [None]:
gpx_sample_df.head()

In [None]:
import matplotlib.pyplot as plt

plt.plot(gpx_sample_df["lon"], gpx_sample_df["lat"])
plt.title("Running Route")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.show()


In [None]:
# gpx_sample_df["speed_kmh"] = gpx_sample_df["speed_mps"] * 3.6
gpx_sample_df["pace"] = gpx_sample_df['speed_mps'].apply(lambda x: 0 if x == 0 else 60 / (x * 3.6))

In [None]:
gpx_sample_df

In [None]:
import matplotlib.pyplot as plt

plt.figure(figsize=(10, 8))

plt.scatter(
    gpx_sample_df["lon"],
    gpx_sample_df["lat"],
    c=gpx_sample_df["pace"],     # color based on pace
    cmap="viridis",          # vibrant color map suitable for speed
    s=12                   # marker size
)

plt.colorbar(label="pace (min/km)")
plt.clim(5, 12)
plt.title("Speed Heatmap on Running Route")
plt.xlabel("Longitude")
plt.ylabel("Latitude")
plt.gca().set_aspect("equal", adjustable="box")

plt.show()


In [None]:
import matplotlib.pyplot as plt
from matplotlib.collections import LineCollection
import numpy as np

# Create segments
points = np.array([gpx_sample_df["lon"], gpx_sample_df["lat"]]).T.reshape(-1, 1, 2)
segments = np.concatenate([points[:-1], points[1:]], axis=1)

# Use speed for color
speed = gpx_sample_df["pace"].iloc[:-1]  # one less than points

lc = LineCollection(segments, cmap="turbo", norm=plt.Normalize(speed.min(), speed.max()))
lc.set_array(speed)
lc.set_linewidth(3)

fig, ax = plt.subplots(figsize=(10, 8))
ax.add_collection(lc)
ax.autoscale()
ax.set_aspect("equal", adjustable="box")

plt.colorbar(lc, label="pace (min/km)")
plt.title("Running Route Speed Heatmap")
plt.xlabel("Longitude")
plt.ylabel("Latitude")

plt.show()


In [None]:
import folium
from folium.plugins import HeatMap

m = folium.Map(location=[gpx_sample_df["lat"].mean(), gpx_sample_df["lon"].mean()], zoom_start=15)

HeatMap(
    data=gpx_sample_df[['lat','lon','speed_kmh']].values,
    radius=8,
    max_zoom=13
).add_to(m)

m.save("/content/drive/MyDrive/Exploration Data Field/Personal Project/Strava/speed_heatmap.html")


In [None]:
import webbrowser

webbrowser.open("/content/drive/MyDrive/Exploration Data Field/Personal Project/Strava/speed_heatmap.html")