# Strava API Pipeline Workbook

## Basic Setup

In [20]:
import requests
import json
import datetime
from datetime import datetime
from datetime import date
import time
import pandas as pd
import os
from dotenv import load_dotenv
import sqlite3

pd.set_option('display.max_columns', None)

# Developer Docs
# https://developers.strava.com/docs/getting-started/#basic

# Streams API Details
# https://developers.strava.com/docs/reference/#api-Streams-getActivityStreams

# Replace with your actual credentials
load_dotenv(dotenv_path="secrets.env")
CLIENT_ID = os.environ.get("CLIENT_ID")
CLIENT_SECRET = os.environ.get("CLIENT_SECRET")
REFRESH_TOKEN = os.environ.get("REFRESH_TOKEN")

DB_PATH = 'strava_data.db'

load_dotenv(dotenv_path="secrets.env", override=True)

True

In [35]:
DB_PATH = 'strava_data.db'

In [3]:
print(CLIENT_ID)

64543


### Error Troubleshooting

HTTP error occurred: 401 Client Error: Unauthorized for url: https://www.strava.com/api/v3/athlete/activities?page=1&per_page=1

Go to this link, and note the scopes being used at the end of the URL - 
https://www.strava.com/oauth/authorize?client_id=64543&response_type=code&redirect_uri=http://localhost/exchange_token&approval_prompt=force&scope=profile:read_all,activity:read_all

More details on scope here - https://developers.strava.com/docs/authentication/#detailsaboutrequestingaccess

Extract the auth code from the reply URL and assign it using the below cell.

Then, run the contents of the Code to Exchange Auth Code for Auth Token section to get a valid AUTH_TOKEN

In [5]:
AUTH_CODE = "0aef51cf61cce5e491d11a2528b4aaa30f9756ca"

In [7]:
load_dotenv(dotenv_path="secrets.env", override=True)
print(os.environ.get("AUTH_CODE"))

0aef51cf61cce5e491d11a2528b4aaa30f9756ca


## How this SHOULD work

Go to this link, and note the scopes being used at the end of the URL - 
https://www.strava.com/oauth/authorize?client_id=64543&response_type=code&redirect_uri=http://localhost/exchange_token&approval_prompt=force&scope=profile:read_all,activity:read_all

In [4]:
AUTH_CODE = 'cf2cfc3a2fda1c8b9f2d6ed48623175047d3ea29'

The auth code provides the initial access and scope to the app's athletes but is not used for queries.

That AUTH CODE should then be used below to exchange for a short lived (6 hours?) ACCESS TOKEN

We will also store the replied REFRESH_TOKEN in the environment variables

In [6]:
def get_auth_token(client_id, client_secret, auth_code):
    """Refreshes the access token using the refresh token."""
    response = requests.post(
        url="https://www.strava.com/oauth/token",
        data={
            "client_id": client_id,
            "client_secret": client_secret,
            "grant_type": "authorization_code",
            "code": auth_code,
        },
    )
    response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
    return response.json()

try:
    access_token_request = get_auth_token(CLIENT_ID, CLIENT_SECRET, AUTH_CODE)
    ACCESS_TOKEN = access_token_request["access_token"]
    os.environ["REFRESH_TOKEN"] = access_token_request["refresh_token"]

except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
except json.JSONDecodeError as e:
    print(f"JSON decode error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    
print(os.environ.get("REFRESH_TOKEN"))
print(access_token_request)
print(os.environ.get("REFRESH_TOKEN"))

5d4dc1fd084e0994631899facefef89478de90d3
{'token_type': 'Bearer', 'expires_at': 1747337695, 'expires_in': 21600, 'refresh_token': '5d4dc1fd084e0994631899facefef89478de90d3', 'access_token': '46f0c8973ef94a38944485aa0ff048fe093c84bf', 'athlete': {'id': 24266563, 'username': 'smithcharlie', 'resource_state': 2, 'firstname': 'Charlie', 'lastname': 'Smith ðŸ¦š', 'bio': '', 'city': 'Minneapolis', 'state': 'MN', 'country': 'United States', 'sex': 'M', 'premium': False, 'summit': False, 'created_at': '2017-08-17T15:31:27Z', 'updated_at': '2025-04-19T18:19:00Z', 'badge_type_id': 0, 'weight': 63.5029, 'profile_medium': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/24266563/11743602/9/medium.jpg', 'profile': 'https://dgalywyr863hv.cloudfront.net/pictures/athletes/24266563/11743602/9/large.jpg', 'friend': None, 'follower': None}}
5d4dc1fd084e0994631899facefef89478de90d3


In [6]:
os.environ["ACCESS_TOKEN"] = ACCESS_TOKEN

In [7]:
print(ACCESS_TOKEN)

ASDFASDASDFADFDFA


In [9]:
import utils.strava_utils as strava_utils
ACCESS_TOKEN = "c3f5b56134d8a3b061cc58ee0ae47571b61a5f93"
strava_utils.update_env_variable("ACCESS_TOKEN", ACCESS_TOKEN)

ACCESS_TOKEN


THEN once that ACCESS TOKEN expires, we can try to exchange for a new one using the refresh code we were provided and the below code

## Refresh Auth Token

The manual cell below is necessary because the key from the prior process is stored in an environment variable. This should work for the airflow approach but will need to be copied manually when running local.

In [93]:
REFRESH_TOKEN = "5d4dc1fd084e0994631899facefef89478de90d3"
print(REFRESH_TOKEN)

5d4dc1fd084e0994631899facefef89478de90d3


In [138]:
# old gemini code don't know if useful or not
# This SHOULD work 
def refresh_access_token(client_id, client_secret, refresh_token):
    """Refreshes the access token using the refresh token."""
    response = requests.post(
        url="https://www.strava.com/oauth/token",
        data={
            "client_id": client_id,
            "client_secret": client_secret,
            "grant_type": "refresh_token",
            "refresh_token": refresh_token,
        },
    )
    response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
    os.environ["REFRESH_TOKEN"] = response["refresh_token"]
    return response.json()["access_token"]

In [139]:
try:
    refresh_attempt = refresh_access_token(CLIENT_ID, CLIENT_SECRET, REFRESH_TOKEN)
    ACCESS_TOKEN = refresh_attempt["access_token"]
    os.environ["REFRESH_TOKEN"] = access_token_request["refresh_token"]
    

except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
except json.JSONDecodeError as e:
    print(f"JSON decode error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

HTTP error occurred: 401 Client Error: Unauthorized for url: https://www.strava.com/oauth/token


In [140]:
refresh_attempt

{'token_type': 'Bearer',
 'access_token': 'd49fc739be1f780349f472f3b88a971ea6eb9843',
 'expires_at': 1747020565,
 'expires_in': 21600,
 'refresh_token': '5d4dc1fd084e0994631899facefef89478de90d3'}

## Get Auth Token
#### The Auth Code input here should be the code extracted from the blank webpage after a user approves a scope request

In [None]:
def get_auth_token(client_id, client_secret, auth_code):
    """Refreshes the access token using the refresh token."""
    response = requests.post(
        url="https://www.strava.com/oauth/token",
        data={
            "client_id": client_id,
            "client_secret": client_secret,
            "grant_type": "authorization_code",
            "code": auth_code,
        },
    )
    response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
    return response.json()

#### Code to Exchange Auth Code for Auth Token

In [61]:
try:
    swap_auth_token_attempt = get_auth_token(CLIENT_ID, CLIENT_SECRET, AUTH_CODE)
    AUTH_TOKEN = swap_auth_token_attempt["access_token"]

except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
except json.JSONDecodeError as e:
    print(f"JSON decode error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")
    
print(swap_auth_token_attempt)

An unexpected error occurred: 'access_token'
{'message': 'Bad Request', 'errors': [{'resource': 'AuthorizationCode', 'field': 'code', 'code': 'invalid'}]}


## Base Functions for retreiving activities and timeseries data from Strava API

In [42]:
def get_activities(access_token, page=1, per_page=30, **optional_parameters):
    """Retrieves activities from the Strava API.
    Optional parameters should be provided at the end of the call like so:
    before = epoch_timestamp, after = epoch_timestamp
    """
    url = f"https://www.strava.com/api/v3/athlete/activities"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {"page": page, "per_page": per_page}
    params.update(optional_parameters)
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()

def get_streams(access_token, keys, activity_id):
    """Retrieves activities from the Strava API."""
    url = f"https://www.strava.com/api/v3/activities/" + str(activity_id) + "/streams"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {"keys" : keys, "key_by_type": True}
    # valid keys includes ["time", "distance", "latlng", "altitude", "heartrate", "cadence", "watts"]
    # https://developers.strava.com/docs/reference/#api-models-StreamSet
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()

def get_gear(access_token, gear_id):
    """Retrieves activities from the Strava API."""
    url = f"https://www.strava.com/api/v3/gear/" + str(gear_id)
    headers = {"Authorization": f"Bearer {access_token}"}
    # params = {"id" : gear_id}
    # valid keys includes ["time", "distance", "latlng", "altitude", "heartrate", "cadence", "watts"]
    # https://developers.strava.com/docs/reference/#api-models-StreamSet
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

In [11]:
def get_latest_starting_coords(db_path):
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()
    cur.execute("""SELECT start_latlng, end_latlng FROM activities 
                WHERE start_latlng is not NULL or 
                length(start_latlng) > 100
                order by start_date desc
                limit 1""")
    rows = cur.fetchone()
    print(rows)
    lat, lon = map(float, rows.split(",")) 
    conn.close()

    return (lat, lon)

get_latest_starting_coords(DB_PATH)

('[]', '[]')


AttributeError: 'tuple' object has no attribute 'split'

## Base functions for storing data in db

#### Initialize Activities Table

In [9]:
def initialize_database():
    """Create the SQLite database and full 'activities' table."""
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    c.execute('''
    CREATE TABLE IF NOT EXISTS activities (
        id INTEGER PRIMARY KEY,
        resource_state INTEGER,
        athlete_id INTEGER,
        athlete_resource_state INTEGER,
        name TEXT,
        distance REAL,
        moving_time INTEGER,
        elapsed_time INTEGER,
        total_elevation_gain REAL,
        type TEXT,
        sport_type TEXT,
        workout_type INTEGER,
        start_date TEXT,
        start_date_local TEXT,
        timezone TEXT,
        utc_offset REAL,
        location_city TEXT,
        location_state TEXT,
        location_country TEXT,
        achievement_count INTEGER,
        kudos_count INTEGER,
        comment_count INTEGER,
        athlete_count INTEGER,
        photo_count INTEGER,
        map_id TEXT,
        map_summary_polyline TEXT,
        map_resource_state INTEGER,
        trainer BOOLEAN,
        commute BOOLEAN,
        manual BOOLEAN,
        private BOOLEAN,
        visibility TEXT,
        flagged BOOLEAN,
        gear_id TEXT,
        start_latlng TEXT,
        end_latlng TEXT,
        average_speed REAL,
        max_speed REAL,
        average_cadence REAL,
        average_watts REAL,
        max_watts INTEGER,
        weighted_average_watts INTEGER,
        device_watts BOOLEAN,
        kilojoules REAL,
        has_heartrate BOOLEAN,
        average_heartrate REAL,
        max_heartrate REAL,
        heartrate_opt_out BOOLEAN,
        display_hide_heartrate_option BOOLEAN,
        elev_high REAL,
        elev_low REAL,
        upload_id INTEGER,
        upload_id_str TEXT,
        external_id TEXT,
        from_accepted_tag BOOLEAN,
        pr_count INTEGER,
        total_photo_count INTEGER,
        has_kudoed BOOLEAN,
        import_date TEXT
    )
    ''')

    conn.commit()
    conn.close()

#### Activities Individual DB Entry Write

In [10]:
import json

def insert_activities(activity_list):
    """Insert activity records, skipping those with duplicate 'id'."""
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    for activity in activity_list:
        try:
            c.execute('''
            INSERT INTO activities VALUES (
                :id, :resource_state, 
                :athlete_id, :athlete_resource_state,
                :name, :distance, :moving_time, :elapsed_time, :total_elevation_gain,
                :type, :sport_type, :workout_type, :start_date, :start_date_local,
                :timezone, :utc_offset, :location_city, :location_state, :location_country,
                :achievement_count, :kudos_count, :comment_count, :athlete_count, :photo_count,
                :map_id, :map_summary_polyline, :map_resource_state,
                :trainer, :commute, :manual, :private, :visibility, :flagged, :gear_id,
                :start_latlng, :end_latlng,
                :average_speed, :max_speed, :average_cadence, :average_watts,
                :max_watts, :weighted_average_watts, :device_watts, :kilojoules,
                :has_heartrate, :average_heartrate, :max_heartrate,
                :heartrate_opt_out, :display_hide_heartrate_option,
                :elev_high, :elev_low,
                :upload_id, :upload_id_str, :external_id, :from_accepted_tag,
                :pr_count, :total_photo_count, :has_kudoed, :import_date
            )
            ''', {
                "id": activity["id"],
                "resource_state": activity.get("resource_state"),
                "athlete_id": activity.get("athlete", {}).get("id"),
                "athlete_resource_state": activity.get("athlete", {}).get("resource_state"),
                "name": activity.get("name"),
                "distance": activity.get("distance"),
                "moving_time": activity.get("moving_time"),
                "elapsed_time": activity.get("elapsed_time"),
                "total_elevation_gain": activity.get("total_elevation_gain"),
                "type": activity.get("type"),
                "sport_type": activity.get("sport_type"),
                "workout_type": activity.get("workout_type"),
                "start_date": activity.get("start_date"),
                "start_date_local": activity.get("start_date_local"),
                "timezone": activity.get("timezone"),
                "utc_offset": activity.get("utc_offset"),
                "location_city": activity.get("location_city"),
                "location_state": activity.get("location_state"),
                "location_country": activity.get("location_country"),
                "achievement_count": activity.get("achievement_count"),
                "kudos_count": activity.get("kudos_count"),
                "comment_count": activity.get("comment_count"),
                "athlete_count": activity.get("athlete_count"),
                "photo_count": activity.get("photo_count"),
                "map_id": activity.get("map", {}).get("id"),
                "map_summary_polyline": activity.get("map", {}).get("summary_polyline"),
                "map_resource_state": activity.get("map", {}).get("resource_state"),
                "trainer": activity.get("trainer"),
                "commute": activity.get("commute"),
                "manual": activity.get("manual"),
                "private": activity.get("private"),
                "visibility": activity.get("visibility"),
                "flagged": activity.get("flagged"),
                "gear_id": activity.get("gear_id"),
                "start_latlng": json.dumps(activity.get("start_latlng")),
                "end_latlng": json.dumps(activity.get("end_latlng")),
                "average_speed": activity.get("average_speed"),
                "max_speed": activity.get("max_speed"),
                "average_cadence": activity.get("average_cadence"),
                "average_watts": activity.get("average_watts"),
                "max_watts": activity.get("max_watts"),
                "weighted_average_watts": activity.get("weighted_average_watts"),
                "device_watts": activity.get("device_watts"),
                "kilojoules": activity.get("kilojoules"),
                "has_heartrate": activity.get("has_heartrate"),
                "average_heartrate": activity.get("average_heartrate"),
                "max_heartrate": activity.get("max_heartrate"),
                "heartrate_opt_out": activity.get("heartrate_opt_out"),
                "display_hide_heartrate_option": activity.get("display_hide_heartrate_option"),
                "elev_high": activity.get("elev_high"),
                "elev_low": activity.get("elev_low"),
                "upload_id": activity.get("upload_id"),
                "upload_id_str": activity.get("upload_id_str"),
                "external_id": activity.get("external_id"),
                "from_accepted_tag": activity.get("from_accepted_tag"),
                "pr_count": activity.get("pr_count"),
                "total_photo_count": activity.get("total_photo_count"),
                "has_kudoed": activity.get("has_kudoed"),
                "import_date": datetime.now().isoformat()
            })
        except sqlite3.IntegrityError:
            print(f"Skipping duplicate activity with id {activity['id']}")

    conn.commit()
    conn.close()


#### Activities Batch Write

In [11]:
def insert_activities_batch(activity_list, db_path):
    """Efficiently insert multiple activity records into the database."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    data = []
    for activity in activity_list:
        data.append({
            "id": activity["id"],
            "resource_state": activity.get("resource_state"),
            "athlete_id": activity.get("athlete", {}).get("id"),
            "athlete_resource_state": activity.get("athlete", {}).get("resource_state"),
            "name": activity.get("name"),
            "distance": activity.get("distance"),
            "moving_time": activity.get("moving_time"),
            "elapsed_time": activity.get("elapsed_time"),
            "total_elevation_gain": activity.get("total_elevation_gain"),
            "type": activity.get("type"),
            "sport_type": activity.get("sport_type"),
            "workout_type": activity.get("workout_type"),
            "start_date": activity.get("start_date"),
            "start_date_local": activity.get("start_date_local"),
            "timezone": activity.get("timezone"),
            "utc_offset": activity.get("utc_offset"),
            "location_city": activity.get("location_city"),
            "location_state": activity.get("location_state"),
            "location_country": activity.get("location_country"),
            "achievement_count": activity.get("achievement_count"),
            "kudos_count": activity.get("kudos_count"),
            "comment_count": activity.get("comment_count"),
            "athlete_count": activity.get("athlete_count"),
            "photo_count": activity.get("photo_count"),
            "map_id": activity.get("map", {}).get("id"),
            "map_summary_polyline": activity.get("map", {}).get("summary_polyline"),
            "map_resource_state": activity.get("map", {}).get("resource_state"),
            "trainer": activity.get("trainer"),
            "commute": activity.get("commute"),
            "manual": activity.get("manual"),
            "private": activity.get("private"),
            "visibility": activity.get("visibility"),
            "flagged": activity.get("flagged"),
            "gear_id": activity.get("gear_id"),
            "start_latlng": json.dumps(activity.get("start_latlng")),
            "end_latlng": json.dumps(activity.get("end_latlng")),
            "average_speed": activity.get("average_speed"),
            "max_speed": activity.get("max_speed"),
            "average_cadence": activity.get("average_cadence"),
            "average_watts": activity.get("average_watts"),
            "max_watts": activity.get("max_watts"),
            "weighted_average_watts": activity.get("weighted_average_watts"),
            "device_watts": activity.get("device_watts"),
            "kilojoules": activity.get("kilojoules"),
            "has_heartrate": activity.get("has_heartrate"),
            "average_heartrate": activity.get("average_heartrate"),
            "max_heartrate": activity.get("max_heartrate"),
            "heartrate_opt_out": activity.get("heartrate_opt_out"),
            "display_hide_heartrate_option": activity.get("display_hide_heartrate_option"),
            "elev_high": activity.get("elev_high"),
            "elev_low": activity.get("elev_low"),
            "upload_id": activity.get("upload_id"),
            "upload_id_str": activity.get("upload_id_str"),
            "external_id": activity.get("external_id"),
            "from_accepted_tag": activity.get("from_accepted_tag"),
            "pr_count": activity.get("pr_count"),
            "total_photo_count": activity.get("total_photo_count"),
            "has_kudoed": activity.get("has_kudoed"),
            "import_date": datetime.now().isoformat()
        })

    try:
        c.executemany('''
        INSERT OR IGNORE INTO activities VALUES (
            :id, :resource_state, :athlete_id, :athlete_resource_state,
            :name, :distance, :moving_time, :elapsed_time, :total_elevation_gain,
            :type, :sport_type, :workout_type, :start_date, :start_date_local,
            :timezone, :utc_offset, :location_city, :location_state, :location_country,
            :achievement_count, :kudos_count, :comment_count, :athlete_count, :photo_count,
            :map_id, :map_summary_polyline, :map_resource_state,
            :trainer, :commute, :manual, :private, :visibility, :flagged, :gear_id,
            :start_latlng, :end_latlng,
            :average_speed, :max_speed, :average_cadence, :average_watts,
            :max_watts, :weighted_average_watts, :device_watts, :kilojoules,
            :has_heartrate, :average_heartrate, :max_heartrate,
            :heartrate_opt_out, :display_hide_heartrate_option,
            :elev_high, :elev_low,
            :upload_id, :upload_id_str, :external_id, :from_accepted_tag,
            :pr_count, :total_photo_count, :has_kudoed, :import_date
        )
        ''', data)
    except sqlite3.Error as e:
        print("Error inserting batch:", e)

    conn.commit()
    conn.close()


#### Function to Rebuild single activity from flattened version in db

In [12]:
def get_activity_by_id(activity_id):
    """Retrieve a single activity and reconstruct its nested format."""
    conn = sqlite3.connect(DB_PATH)
    conn.row_factory = sqlite3.Row
    c = conn.cursor()

    c.execute("SELECT * FROM activities WHERE id = ?", (activity_id,))
    row = c.fetchone()
    conn.close()

    if row is None:
        return None

    return {
        "id": row["id"],
        "resource_state": row["resource_state"],
        "athlete": {
            "id": row["athlete_id"],
            "resource_state": row["athlete_resource_state"]
        },
        "name": row["name"],
        "distance": row["distance"],
        "moving_time": row["moving_time"],
        "elapsed_time": row["elapsed_time"],
        "total_elevation_gain": row["total_elevation_gain"],
        "type": row["type"],
        "sport_type": row["sport_type"],
        "workout_type": row["workout_type"],
        "start_date": row["start_date"],
        "start_date_local": row["start_date_local"],
        "timezone": row["timezone"],
        "utc_offset": row["utc_offset"],
        "location_city": row["location_city"],
        "location_state": row["location_state"],
        "location_country": row["location_country"],
        "achievement_count": row["achievement_count"],
        "kudos_count": row["kudos_count"],
        "comment_count": row["comment_count"],
        "athlete_count": row["athlete_count"],
        "photo_count": row["photo_count"],
        "map": {
            "id": row["map_id"],
            "summary_polyline": row["map_summary_polyline"],
            "resource_state": row["map_resource_state"]
        },
        "trainer": bool(row["trainer"]),
        "commute": bool(row["commute"]),
        "manual": bool(row["manual"]),
        "private": bool(row["private"]),
        "visibility": row["visibility"],
        "flagged": bool(row["flagged"]),
        "gear_id": row["gear_id"],
        "start_latlng": json.loads(row["start_latlng"]),
        "end_latlng": json.loads(row["end_latlng"]),
        "average_speed": row["average_speed"],
        "max_speed": row["max_speed"],
        "average_cadence": row["average_cadence"],
        "average_watts": row["average_watts"],
        "max_watts": row["max_watts"],
        "weighted_average_watts": row["weighted_average_watts"],
        "device_watts": bool(row["device_watts"]),
        "kilojoules": row["kilojoules"],
        "has_heartrate": bool(row["has_heartrate"]),
        "average_heartrate": row["average_heartrate"],
        "max_heartrate": row["max_heartrate"],
        "heartrate_opt_out": bool(row["heartrate_opt_out"]),
        "display_hide_heartrate_option": bool(row["display_hide_heartrate_option"]),
        "elev_high": row["elev_high"],
        "elev_low": row["elev_low"],
        "upload_id": row["upload_id"],
        "upload_id_str": row["upload_id_str"],
        "external_id": row["external_id"],
        "from_accepted_tag": bool(row["from_accepted_tag"]),
        "pr_count": row["pr_count"],
        "total_photo_count": row["total_photo_count"],
        "has_kudoed": bool(row["has_kudoed"])
    }


#### Simple Query to get records loaded during current day

In [13]:
def count_activities_imported_today(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT COUNT(*) 
        FROM activities 
        WHERE substr(import_date, 1, 10) = ?
    """, (today_str,))
    
    count = c.fetchone()[0]
    conn.close()
    return count


#### Simple Query to Latest record

In [21]:
def latest_activity_imported(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT * 
        FROM activities 
        WHERE import_date = 
        (select max(import_date) from activities)
    """)
    
    record = c.fetchone()
    conn.close()
    return record

latest_activity_imported(DB_PATH)


(14497411369,
 2,
 24266563,
 1,
 'Morning Walk',
 4647.5,
 2861,
 2861,
 45.0,
 'Walk',
 'Walk',
 None,
 '2025-05-16T10:48:01Z',
 '2025-05-16T05:48:01Z',
 '(GMT-06:00) America/Chicago',
 -18000.0,
 None,
 None,
 None,
 0,
 1,
 0,
 1,
 0,
 'a14497411369',
 'o}jqG`uqxP?c@IkBOeA]{ABo@EyANuAGIq@g@g@o@m@iASo@A}@?c@LkAJUXc@VUd@g@T[@IGM_BeAQYGWS_DA_EJ_A@a@C]EYGCO@MCs@Ls@Dy@OeANWGy@]k@MYAm@@]?g@GQISOWi@Ka@[mDSi@e@c@KGg@KiABc@As@SIBONUt@GFc@k@k@[WEUF_@`@UhAJnB?nAD`ACrA@b@WhCETIPG\\Oh@a@j@QLUZ_AbAAJ@xACjADDL@X?v@FTAn@HLAVFh@C`AF\\?jANz@Rv@Zd@D^LL?t@^bBf@RNr@`@PRVh@l@nBp@fB^j@VZRPn@\\^LXDx@D|A@d@D`@X^f@n@l@TXTNFRJf@PpA\\~@',
 2,
 0,
 0,
 0,
 0,
 'everyone',
 0,
 'g15302697',
 '[44.949665, -93.324741]',
 '[44.949581, -93.32488]',
 1.624,
 2.5,
 60.9,
 None,
 None,
 None,
 None,
 None,
 1,
 86.1,
 108.0,
 0,
 1,
 287.0,
 275.0,
 15467510211,
 '15467510211',
 '469062869529427971.fit',
 0,
 0,
 1,
 0,
 '2025-05-16T07:45:35.460549')

In [14]:
latest_activity_imported(DB_PATH)

(14493150635,
 2,
 24266563,
 1,
 'Turbulent',
 7286.1,
 2362,
 2376,
 34.0,
 'Run',
 'Run',
 0,
 '2025-05-15T21:44:22Z',
 '2025-05-15T16:44:22Z',
 '(GMT-06:00) America/Chicago',
 -18000.0,
 None,
 None,
 None,
 0,
 13,
 0,
 1,
 0,
 'a14493150635',
 'w}jqGluqxPAeAKmAI}@Ki@Sg@]]k@s@m@g@e@Qw@E_BDSEq@WcBeAW]Wk@MOG?EDSr@m@z@k@tAc@d@MZMl@GHIDO?k@W]c@o@m@o@aAOi@[aBA]E[Qs@_@}@_Aq@e@OkA{@I?S@_@PWBs@Sg@COFORc@RILIX@VD\\@`@E~@EVE@QW{@aAMW?INNHDF?VO@EISWWOGa@@a@CKESOQGo@CUBe@Km@GIEQQa@{@OW}@_Ak@c@s@s@O?]ZSx@QTOF_@HOBa@AKBSXMZEZGpA@`ADt@CvAWrBMb@ATN`@NPH@`@CVVFJ?^G^MT[Vw@TMNSh@QnACv@O`BC|AInB?xBE~@H~A?l@HlAC\\@NL~@JZFJ`@\\DJX|BB`BDf@DTTt@Jl@FRLJJBV@hCQTBp@?RDT@`@Cr@I`@?j@HXV^L\\XVb@DLl@fD\\p@~AfBXn@HDRDd@ZbAjAd@d@TJVFVBt@C|@Nh@@`AEnBSn@Dh@GrCIlA?VCn@Bb@C`@@HERW^?NEf@k@j@c@Za@',
 2,
 0,
 0,
 0,
 0,
 'everyone',
 0,
 'g17224321',
 '[44.949358, -93.324792]',
 '[44.949456, -93.324709]',
 3.085,
 4.6,
 88.6,
 217.2,
 293,
 218,
 1,
 513.1,
 1,
 126.7,
 140.0,
 0,
 1,
 292.0,
 273.0,
 15462887799,
 '154

### Simple Query to get Gear Id from Latest Record

In [15]:
def get_all_gear_ids(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT distinct gear_id 
        FROM activities 
        WHERE gear_id IS NOT NULL
    """)
    
    record = c.fetchall()
    conn.close()
    return record

#### Simple Query to Get Specific record

In [16]:
def activity_data_all(db_path, activity_id):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    # Run query and load into DataFrame (with column headers)
    df = pd.read_sql_query(
        "SELECT * FROM activities WHERE id = ?",
        conn,
        params=(activity_id,)
    )
    return df

#### Simple Query to get all Activity IDs

In [17]:
def get_all_activity_ids(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT id
        FROM activities 
        order by start_date desc
              """)
    
    count = c.fetchall()
    conn.close()
    return count

get_all_activity_ids(DB_PATH)

[(14487474408,),
 (14483199626,),
 (14477593705,),
 (14472461251,),
 (14465685256,),
 (14461585344,),
 (14448198825,),
 (14445846595,),
 (14439139788,),
 (14436191500,),
 (14429488624,),
 (14429488648,),
 (14420081833,),
 (14414666612,),
 (14411191781,),
 (14411191925,),
 (14411192865,),
 (14404286770,),
 (14399681906,),
 (14393650080,),
 (14389805238,),
 (14383969194,),
 (14380480511,),
 (14379281304,),
 (14376062303,),
 (14369842769,),
 (14368997811,),
 (14365012542,),
 (14358400377,),
 (14354639209,),
 (14353398775,),
 (14348764058,),
 (14342981460,),
 (14338478098,),
 (14338218423,),
 (14337526946,),
 (14331978461,),
 (14326961057,),
 (14321075456,),
 (14317096914,),
 (14310902427,),
 (14305742622,),
 (14302735322,),
 (14292794382,),
 (14285619866,),
 (14280817149,),
 (14276784576,),
 (14271402828,),
 (14267399463,),
 (14261794985,),
 (14245504574,),
 (14237190879,),
 (14226658058,),
 (14222270286,),
 (14217673520,),
 (14214735620,),
 (14211839733,),
 (14207239257,),
 (14202362392,

#### Simple Query to get all Activity IDs with HR and presumably Streams data

In [18]:
def get_all_activity_ids_with_HR(db_path):
    """Get all activity ids that have HR data, which hopefully will help us avoid a 404 call on the streams data.
    My thinking is that if no HR data, we probably have no streams data at all.
    """
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT id
        FROM activities 
        WHERE has_heartrate = 1
        order by start_date desc
              """)
    
    count = c.fetchall()
    conn.close()
    return count

In [19]:
def latest_activity_import_date(db_path):
    """Query for latest activity import_date as a unix timestamp"""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT max(import_date)
        FROM activities 
    """)
    
    record = c.fetchone()[0]
    conn.close()
    dt = datetime.strptime(record, "%Y-%m-%dT%H:%M:%S.%f")
    return int(dt.timestamp())

In [20]:
latest_activity_import_date(DB_PATH)

1747316241

## Database Streams Code

#### Initialize DB  

In [21]:
def initialize_streams_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    c.execute("""
    CREATE TABLE IF NOT EXISTS streams (
        activity_id INTEGER PRIMARY KEY,
        
        time_data TEXT,
        time_series_type TEXT,
        time_original_size INTEGER,
        time_resolution TEXT,

        distance_data TEXT,
        distance_series_type TEXT,
        distance_original_size INTEGER,
        distance_resolution TEXT,

        latlng_data TEXT,
        latlng_series_type TEXT,
        latlng_original_size INTEGER,
        latlng_resolution TEXT,

        altitude_data TEXT,
        altitude_series_type TEXT,
        altitude_original_size INTEGER,
        altitude_resolution TEXT,

        velocity_smooth_data TEXT,
        velocity_smooth_series_type TEXT,
        velocity_smooth_original_size INTEGER,
        velocity_smooth_resolution TEXT,

        heartrate_data TEXT,
        heartrate_series_type TEXT,
        heartrate_original_size INTEGER,
        heartrate_resolution TEXT,

        cadence_data TEXT,
        cadence_series_type TEXT,
        cadence_original_size INTEGER,
        cadence_resolution TEXT,

        watts_data TEXT,
        watts_series_type TEXT,
        watts_original_size INTEGER,
        watts_resolution TEXT,

        moving_data TEXT,
        moving_series_type TEXT,
        moving_original_size INTEGER,
        moving_resolution TEXT,

        grade_smooth_data TEXT,
        grade_smooth_series_type TEXT,
        grade_smooth_original_size INTEGER,
        grade_smooth_resolution TEXT
    )
    """)

    conn.commit()
    conn.close()

#### Insert Stream Data for Single Activity ID

In [22]:
def insert_stream_data(activity_id, stream_dict, db_path):
    """
    Inserts or replaces a row in the streams table for a given activity_id.
    stream_dict should have keys like 'time', 'distance', etc., with each value a dict containing:
    {
        'data': [...],
        'series_type': '...',
        'original_size': ...,
        'resolution': '...'
    }
    """
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    # Create the column mappings dynamically
    base_columns = []
    placeholders = []
    values = []

    # Always include activity_id
    base_columns.append("activity_id")
    placeholders.append("?")
    values.append(activity_id)

    for key, val in stream_dict.items():
        if not isinstance(val, dict):
            continue  # skip malformed

        base_columns.extend([
            f"{key}_data",
            f"{key}_series_type",
            f"{key}_original_size",
            f"{key}_resolution"
        ])
        placeholders.extend(["?"] * 4)

        values.extend([
            json.dumps(val.get("data")),
            val.get("series_type"),
            val.get("original_size"),
            val.get("resolution")
        ])

    sql = f"""
        INSERT INTO streams ({', '.join(base_columns)})
        VALUES ({', '.join(placeholders)})
    """
    

    try:
        c.execute(sql, values)
        conn.commit()
        conn.close()
    except sqlite3.IntegrityError:
        print(f"Activity {activity_id} already exists in the 'streams' table. Skipping insert.")

#### Simple Query to Get all activity IDs from Streams

In [23]:
def get_all_activity_ids_streams(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT activity_id
        FROM streams 
              """)
    
    count = c.fetchall()
    conn.close()
    return count

In [24]:
#### Simple Query to Get all Streams for latest activity

In [18]:
def single_stream_example(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    
    # Run query and load into DataFrame (with column headers)
    df = pd.read_sql_query(
        """
WITH daily_load AS (
                SELECT 
                    date(datetime(start_date)) as date,
                    -- Using distance * speed as load proxy (training impulse)
                    SUM(average_speed * moving_time / 1000.0) as daily_load
                FROM activities
                WHERE type = 'Run'
                GROUP BY date
            )
                -- Create a continuous date series to handle missing days
                SELECT date(date('now', '-90 days') + (n-1) || ' days') as date
                FROM (
                    SELECT row_number() OVER () as n 
                    FROM activities LIMIT 91
                )
                WHERE date <= date('now')

    """,
        conn
    )
    return df


In [19]:
test_df = single_stream_example(DB_PATH)
test_df 

Unnamed: 0,date
0,-4707-06-10
1,-4707-06-11
2,-4707-06-12
3,-4707-06-13
4,-4707-06-14
...,...
86,-4707-09-04
87,-4707-09-05
88,-4707-09-06
89,-4707-09-07


In [27]:
len(test_df.time_data[0])

42870

In [28]:
len(test_df.heartrate_data[0])

36550

In [8]:
def query_tester(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    
    # Run query and load into DataFrame (with column headers)
    df = pd.read_sql_query(
        """
SELECT
            activity_id,
            MIN(CAST(time_data AS INTEGER)) AS min_time,
            MAX(CAST(time_data AS INTEGER)) AS max_time,
            (MAX(CAST(time_data AS INTEGER)) - MIN(CAST(time_data AS INTEGER))) AS total_duration
        FROM streams
        GROUP BY activity_id
    """,
        conn
    )
    return df

In [9]:
query_tester(DB_PATH)

Unnamed: 0,activity_id,min_time,max_time,total_duration
0,2056073959,0,0,0
1,2058321970,0,0,0
2,2063976670,0,0,0
3,2070854538,0,0,0
4,2070854539,0,0,0
...,...,...,...,...
2755,14465685256,0,0,0
2756,14472461251,0,0,0
2757,14477593705,0,0,0
2758,14483199626,0,0,0


In [29]:
def get_streams_data(activity_id, db_path):
    conn = sqlite3.connect(db_path)
    cur = conn.cursor()
    cur.execute("SELECT distance_data, heartrate_data, altitude_data FROM streams WHERE activity_id = ?", (activity_id,))
    row = cur.fetchone()
    conn.close()

    if row:
        try:
            distance = json.loads(row[0]) if row[0] else []
            heartrate = json.loads(row[1]) if row[1] else []
            altitude = json.loads(row[2]) if row[2] else []
            return distance, heartrate, altitude
        except Exception as e:
            print("Failed to load stream data:", e)
    return [], [], []

In [44]:
z = get_streams_data(14477593705, DB_PATH)

In [45]:
z

([0.0,
  0.0,
  0.0,
  2.5,
  5.0,
  6.0,
  7.0,
  8.5,
  10.0,
  12.0,
  14.0,
  16.5,
  19.0,
  21.5,
  24.0,
  27.0,
  30.0,
  33.0,
  36.0,
  38.5,
  41.0,
  43.5,
  46.0,
  49.0,
  52.0,
  54.5,
  57.0,
  59.5,
  62.0,
  64.5,
  67.0,
  69.5,
  72.0,
  75.0,
  78.0,
  81.5,
  85.0,
  87.5,
  90.0,
  93.0,
  96.0,
  100.0,
  104.0,
  106.0,
  108.0,
  110.5,
  113.0,
  115.5,
  118.0,
  120.5,
  123.0,
  125.5,
  128.0,
  130.5,
  133.0,
  135.5,
  138.0,
  142.0,
  146.0,
  148.5,
  151.0,
  154.0,
  157.0,
  160.0,
  163.0,
  165.5,
  168.0,
  170.5,
  173.0,
  175.5,
  178.0,
  181.0,
  184.0,
  186.0,
  188.0,
  188.0,
  188.0,
  190.0,
  192.0,
  195.0,
  198.0,
  201.5,
  205.0,
  208.5,
  212.0,
  214.5,
  217.0,
  220.0,
  223.0,
  225.5,
  228.0,
  230.0,
  232.0,
  234.5,
  237.0,
  239.5,
  242.0,
  244.5,
  247.0,
  249.5,
  252.0,
  254.5,
  257.0,
  259.5,
  262.0,
  264.5,
  267.0,
  270.5,
  274.0,
  277.0,
  280.0,
  283.0,
  286.0,
  288.5,
  291.0,
  293.5,
  296

## Database Gear Code

#### Initialize Gear Table  

In [242]:
def drop_gear_table():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    c.execute("""
DROP TABLE gear""")
    conn.commit()
    conn.close()

drop_gear_table()

In [31]:
def initialize_gear_db():
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()

    c.execute("""
    CREATE TABLE IF NOT EXISTS gear (
        gear_id TEXT PRIMARY KEY,
        is_primary BOOLEAN,
        nickname TEXT,
        resource_state INTEGER,
        retired BOOLEAN,
        distance INTEGER,
        brand_name TEXT,
        model_name TEXT,      
        frame_type INTEGER,
        description TEXT,
        weight REAL
    )
    """)

    conn.commit()
    conn.close()

#### Insert Single Gear

In [32]:
import json

def insert_single_gear(gear, db_path):
    """Insert activity records, skipping those with duplicate 'id'."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()
    try:
        c.execute('''
        INSERT OR REPLACE INTO gear VALUES (
            :gear_id, :is_primary, :nickname, 
            :resource_state, :retired, :distance,
            :brand_name, :model_name,
            :frame_type, :description, :weight
                  )
        ''', {
            "gear_id": gear.get("id"),
            "is_primary": gear.get("primary"),
            "nickname" : gear.get("nickname"),
            "resource_state": gear.get("resource_state"),
            "retired" : gear.get("retired"),
            "distance": gear.get("distance"),
            "brand_name": gear.get("brand_name"),
            "model_name": gear.get("model_name"),
            "frame_type": gear.get("frame_type"),
            "description": gear.get("description"),
            "weight" : gear.get("weight"),
            "import_date": datetime.now().isoformat()
        })
    except sqlite3.IntegrityError:
        print(f"Skipping duplicate activity with id {gear['id']}")

    conn.commit()
    conn.close()

#### Get All Gear

In [33]:
def get_all_gear(db_path):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    df = pd.read_sql_query(
        """
        SELECT *
        FROM gear 
        limit 5
    """,
        conn
    )
    return df
   

In [34]:
get_all_gear(DB_PATH)

Unnamed: 0,gear_id,is_primary,nickname,resource_state,retired,distance,brand_name,model_name,frame_type,description,weight
0,b5036222,0,CTS Road,3,1,4390192,Tommaso,Imola,3.0,Stolen,23.0
1,g4006462,0,CTS M860 x1,3,1,687401,New Balance,M860 v9,,Mens 10.5 M860 BK9 Black/Grey,
2,g4556561,0,CTS M860 x2,3,1,676902,New Balance,M860 v9,,Mens 10.5 M860 BK9 Black/Grey,
3,g4872382,0,CTS M860 x3,3,1,694916,New Balance,M860 v9,,Mens 10.5 M860 BK9 Green/Grey,
4,b6893678,0,CTS Road v2,3,0,6487523,Bianchi,"""C2C"" VIA NIRONE Alu Sora 9sp mix Compact",3.0,Acquired 4.1.2020,21.7


In [35]:
def get_gearid_for_specific_activity(db_path, activity_id):
    """Count how many activities were imported today based on the full ISO 8601 import_date timestamp."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    c.execute(
        """
        SELECT gear_id
        FROM activities 
        where id = ?
    """, (activity_id,)
    )
    count = c.fetchone()[0]
    conn.close()
    return count

In [36]:
get_gearid_for_specific_activity(DB_PATH, 14436191500)

'g15852452'

## Get Latest Activity and Associated Streams

In [14]:
try:
    activities = []
    streams = []
    page = 1
    per_page = 1
    while True:
        activities_page = get_activities(AUTH_TOKEN, page=page, per_page=per_page)
        if not activities_page:
            break
        activities.extend(activities_page)
        page += 1
        time.sleep(1)  # Respect rate limits, adjust if needed
        break

    # Process the activity data
    for activity in activities:
        print(f"Activity ID: {activity['id']}, Name: {activity['name']}, Type: {activity['type']}")

    keys = ["time", "distance", "latlng", "altitude", "heartrate", "cadence", "watts"]
    keys = "time, latlng"
    # Process stream data
    for a in activities:
        stream = get_streams(AUTH_TOKEN, keys, a['id'])
        if not stream:
            break
        streams.extend(stream)
        time.sleep(1)  # Respect rate limits, adjust if needed
        break

except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
except json.JSONDecodeError as e:
    print(f"JSON decode error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

Activity ID: 14404286770, Name: Morning Run, Type: Run


## Bulk Historical Processing of Activities
#### Run this up to 1000 API calls per day during historical data loading
#### Quit IMMEDIATELY if rate limited

In [37]:
# starting point, build unix timestamps in 6 month batches

# time_0 = time.mktime(datetime.datetime(2019, 1, 1, 0, 0, 0).timetuple())
# time_1 = time.mktime(datetime.datetime(2019, 7, 1, 0, 0, 0).timetuple()) 
# time_2 = time.mktime(datetime.datetime(2020, 1, 1, 0, 0, 0).timetuple()) 
# time_3 = time.mktime(datetime.datetime(2020, 7, 1, 0, 0, 0).timetuple())
# time_4 = time.mktime(datetime.datetime(2021, 1, 1, 0, 0, 0).timetuple())
# time_5 = time.mktime(datetime.datetime(2021, 7, 1, 0, 0, 0).timetuple())
# time_6 = time.mktime(datetime.datetime(2022, 1, 1, 0, 0, 0).timetuple())
# time_7 = time.mktime(datetime.datetime(2022, 7, 1, 0, 0, 0).timetuple())
# time_8 = time.mktime(datetime.datetime(2023, 1, 1, 0, 0, 0).timetuple())
# time_9 = time.mktime(datetime.datetime(2023, 7, 1, 0, 0, 0).timetuple())
# time_10 = time.mktime(datetime.datetime(2024, 1, 1, 0, 0, 0).timetuple())
# time_11 = time.mktime(datetime.datetime(2024, 7, 1, 0, 0, 0).timetuple())
# time_12 = time.mktime(datetime.datetime(2025, 1, 1, 0, 0, 0).timetuple())
# time_13 = time.mktime(datetime.datetime(2025, 7, 1, 0, 0, 0).timetuple())

day_by_day_before = time.mktime(datetime(2025, 5, 16, 0, 0, 0).timetuple())
day_by_day_after = time.mktime(datetime(2025, 5, 14, 0, 0, 0).timetuple())

In [105]:
print(day_by_day_after)

1746853200.0


In [38]:
# ensure db table exists
initialize_database()

# Get the data
try:
    activities = []
    page = 1
    per_page = 30
    while True:
        activities_page = get_activities(ACCESS_TOKEN, page=page, per_page=per_page, before = day_by_day_before, after = day_by_day_after) # currently done, latest after time was time_12
        if not activities_page:
            break
        print(len(activities_page)) # expect 30 each time unless final page
        activities.extend(activities_page)
        
        insert_activities_batch(activities_page, DB_PATH) # attempt to bulk write to db
        time.sleep(1)  # Respect rate limits, adjust if needed

        num_today = count_activities_imported_today(DB_PATH) # count objects in db now
        print(f"{num_today} activities were imported so far.")

        page += 1
        time.sleep(1)  # Respect rate limits, adjust if needed
        if page > 10:
            break

except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
except json.JSONDecodeError as e:
    print(f"JSON decode error occurred: {e}")
except Exception as e:
    print(f"An unexpected error occurred: {e}")

num_today = count_activities_imported_today(DB_PATH)
print(f"{num_today} activities were imported so far today")
print(f"{len(activities)} activities were imported from the API this round.")

3
2 activities were imported so far.
2 activities were imported so far today
3 activities were imported from the API this round.


## Bulk Historical Processing of Data Streams and Gear
#### Run this every 15 minutes up to 10 times a day during historical data loading

In [41]:
# Initialize the DB
initialize_streams_db()

# get all stream activity IDs
stream_activity_ids = get_all_activity_ids_streams("strava_data.db")
stream_activity_ids = set(i[0] for i in stream_activity_ids)

# get all activity ID's in db sorted by activity date desc as set
sorted_activity_list = get_all_activity_ids_with_HR("strava_data.db")
sorted_activity_list = set(i[0] for i in sorted_activity_list)

# Take the 
valid_activity_ids = tuple(sorted_activity_list - stream_activity_ids)

keys = "time,distance,latlng,altitude,velocity_smooth,heartrate,cadence,watts,temp,moving,grade_smooth"

for i, activity_integer in enumerate(valid_activity_ids):
    
    # attempt to pull stream data
    try:
        stream = get_streams(ACCESS_TOKEN, keys, activity_integer)
        if not stream:
            print('no stream')
            continue
    
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
        if "429" in str(e):
            print("RATE LIMITED!!!")
            break
        if "404" in str(e):
            print("Stream data unavailable for activity")
            continue
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    time.sleep(1)  # Respect rate limits, adjust if needed
    insert_stream_data(activity_integer, stream, db_path=DB_PATH)
    time.sleep(1)  # Respect rate limits, adjust if needed

    # attempt to pull gear data

    try:
        print(activity_integer)
        shoe = get_gearid_for_specific_activity(DB_PATH, activity_integer)
        print(shoe)
        shoe_data = get_gear(ACCESS_TOKEN, shoe)
        if not stream:
            print('no shoe_data')
            continue
    
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
        if "429" in str(e):
            print("RATE LIMITED!!!")
            break
        if "404" in str(e):
            print("Stream data unavailable for activity")
            continue
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    time.sleep(1)  # Respect rate limits, adjust if needed
    insert_single_gear(shoe_data, db_path=DB_PATH)
    time.sleep(1)  # Respect rate limits, adjust if needed

    if i > 10:
        break



In [72]:
num_today = count_activities_imported_today(DB_PATH)
print(f"{len(get_all_activity_ids_streams("strava_data.db"))} streams in the db.")
print(f"{len(get_all_activity_ids_with_HR("strava_data.db"))} valid activities with potential streams data exist in the db.")
print(f"{len(get_all_activity_ids_with_HR("strava_data.db")) - len(get_all_activity_ids_streams("strava_data.db"))} to go!")

2753 streams in the db.
2751 valid activities with potential streams data exist in the db.
-2 to go!


## Gear Workspace

#### The below fully rebuilds the gear table

In [40]:
# Initialize the DB
initialize_gear_db()

shoe_id = get_all_gear_ids(DB_PATH)

for id in shoe_id:
    shoe = get_gear(ACCESS_TOKEN, id[0])
    insert_single_gear(shoe, db_path=DB_PATH)


## Airflow Section

In [None]:
from airflow import DAG
from airflow.operators.python import PythonOperator
from airflow.utils.dates import days_ago
from datetime import timedelta
import requests
import json

CLIENT_ID = os.environ.get("CLIENT_ID")
CLIENT_SECRET = os.environ.get("CLIENT_SECRET")
REFRESH_TOKEN = os.environ.get("REFRESH_TOKEN")
DB_PATH = os.environ.get("DATABASE")

def latest_activity_import_date(db_path):
    """Query for latest activity import_date as a unix timestamp"""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    today_str = date.today().isoformat()  # 'YYYY-MM-DD'

    # Use substr to extract the date portion (first 10 characters)
    c.execute("""
        SELECT max(import_date)
        FROM activities 
    """)
    
    record = c.fetchone()[0]
    conn.close()
    dt = datetime.strptime(record, "%Y-%m-%dT%H:%M:%S.%f")
    return int(dt.timestamp())

def get_activities(access_token, page=1, per_page=30, **optional_parameters):
    """Retrieves activities from the Strava API.
    Optional parameters should be provided at the end of the call like so:
    before = epoch_timestamp, after = epoch_timestamp
    """
    url = f"https://www.strava.com/api/v3/athlete/activities"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {"page": page, "per_page": per_page}
    params.update(optional_parameters)
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()

def get_streams(access_token, keys = "time,distance,latlng,altitude,velocity_smooth,heartrate,cadence,watts,temp,moving,grade_smooth", activity_id):
    """Retrieves activities from the Strava API."""
    url = f"https://www.strava.com/api/v3/activities/" + str(activity_id) + "/streams"
    headers = {"Authorization": f"Bearer {access_token}"}
    params = {"keys" : keys, "key_by_type": True}
    response = requests.get(url, headers=headers, params=params)
    response.raise_for_status()
    return response.json()

def get_gear(access_token, gear_id):
    """Retrieves activities from the Strava API."""
    url = f"https://www.strava.com/api/v3/gear/" + str(gear_id)
    headers = {"Authorization": f"Bearer {access_token}"}
    response = requests.get(url, headers=headers)
    response.raise_for_status()
    return response.json()

def insert_activities_batch(activity_list, db_path = DB_PATH):
    """Efficiently insert multiple activity records into the database."""
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    data = []
    for activity in activity_list:
        data.append({
            "id": activity["id"],
            "resource_state": activity.get("resource_state"),
            "athlete_id": activity.get("athlete", {}).get("id"),
            "athlete_resource_state": activity.get("athlete", {}).get("resource_state"),
            "name": activity.get("name"),
            "distance": activity.get("distance"),
            "moving_time": activity.get("moving_time"),
            "elapsed_time": activity.get("elapsed_time"),
            "total_elevation_gain": activity.get("total_elevation_gain"),
            "type": activity.get("type"),
            "sport_type": activity.get("sport_type"),
            "workout_type": activity.get("workout_type"),
            "start_date": activity.get("start_date"),
            "start_date_local": activity.get("start_date_local"),
            "timezone": activity.get("timezone"),
            "utc_offset": activity.get("utc_offset"),
            "location_city": activity.get("location_city"),
            "location_state": activity.get("location_state"),
            "location_country": activity.get("location_country"),
            "achievement_count": activity.get("achievement_count"),
            "kudos_count": activity.get("kudos_count"),
            "comment_count": activity.get("comment_count"),
            "athlete_count": activity.get("athlete_count"),
            "photo_count": activity.get("photo_count"),
            "map_id": activity.get("map", {}).get("id"),
            "map_summary_polyline": activity.get("map", {}).get("summary_polyline"),
            "map_resource_state": activity.get("map", {}).get("resource_state"),
            "trainer": activity.get("trainer"),
            "commute": activity.get("commute"),
            "manual": activity.get("manual"),
            "private": activity.get("private"),
            "visibility": activity.get("visibility"),
            "flagged": activity.get("flagged"),
            "gear_id": activity.get("gear_id"),
            "start_latlng": json.dumps(activity.get("start_latlng")),
            "end_latlng": json.dumps(activity.get("end_latlng")),
            "average_speed": activity.get("average_speed"),
            "max_speed": activity.get("max_speed"),
            "average_cadence": activity.get("average_cadence"),
            "average_watts": activity.get("average_watts"),
            "max_watts": activity.get("max_watts"),
            "weighted_average_watts": activity.get("weighted_average_watts"),
            "device_watts": activity.get("device_watts"),
            "kilojoules": activity.get("kilojoules"),
            "has_heartrate": activity.get("has_heartrate"),
            "average_heartrate": activity.get("average_heartrate"),
            "max_heartrate": activity.get("max_heartrate"),
            "heartrate_opt_out": activity.get("heartrate_opt_out"),
            "display_hide_heartrate_option": activity.get("display_hide_heartrate_option"),
            "elev_high": activity.get("elev_high"),
            "elev_low": activity.get("elev_low"),
            "upload_id": activity.get("upload_id"),
            "upload_id_str": activity.get("upload_id_str"),
            "external_id": activity.get("external_id"),
            "from_accepted_tag": activity.get("from_accepted_tag"),
            "pr_count": activity.get("pr_count"),
            "total_photo_count": activity.get("total_photo_count"),
            "has_kudoed": activity.get("has_kudoed"),
            "import_date": datetime.now().isoformat()
        })

    try:
        c.executemany('''
        INSERT OR IGNORE INTO activities VALUES (
            :id, :resource_state, :athlete_id, :athlete_resource_state,
            :name, :distance, :moving_time, :elapsed_time, :total_elevation_gain,
            :type, :sport_type, :workout_type, :start_date, :start_date_local,
            :timezone, :utc_offset, :location_city, :location_state, :location_country,
            :achievement_count, :kudos_count, :comment_count, :athlete_count, :photo_count,
            :map_id, :map_summary_polyline, :map_resource_state,
            :trainer, :commute, :manual, :private, :visibility, :flagged, :gear_id,
            :start_latlng, :end_latlng,
            :average_speed, :max_speed, :average_cadence, :average_watts,
            :max_watts, :weighted_average_watts, :device_watts, :kilojoules,
            :has_heartrate, :average_heartrate, :max_heartrate,
            :heartrate_opt_out, :display_hide_heartrate_option,
            :elev_high, :elev_low,
            :upload_id, :upload_id_str, :external_id, :from_accepted_tag,
            :pr_count, :total_photo_count, :has_kudoed, :import_date
        )
        ''', data)
    except sqlite3.Error as e:
        print("Error inserting batch:", e)

    conn.commit()
    conn.close()

def insert_stream_data(activity_id, stream_dict, db_path = DB_PATH):
    """
    Inserts or replaces a row in the streams table for a given activity_id.
    stream_dict should have keys like 'time', 'distance', etc., with each value a dict containing:
    {
        'data': [...],
        'series_type': '...',
        'original_size': ...,
        'resolution': '...'
    }
    """
    conn = sqlite3.connect(db_path)
    c = conn.cursor()

    # Create the column mappings dynamically
    base_columns = []
    placeholders = []
    values = []

    # Always include activity_id
    base_columns.append("activity_id")
    placeholders.append("?")
    values.append(activity_id)

    for key, val in stream_dict.items():
        if not isinstance(val, dict):
            continue  # skip malformed

        base_columns.extend([
            f"{key}_data",
            f"{key}_series_type",
            f"{key}_original_size",
            f"{key}_resolution"
        ])
        placeholders.extend(["?"] * 4)

        values.extend([
            json.dumps(val.get("data")),
            val.get("series_type"),
            val.get("original_size"),
            val.get("resolution")
        ])

    sql = f"""
        INSERT INTO streams ({', '.join(base_columns)})
        VALUES ({', '.join(placeholders)})
    """
    

    try:
        c.execute(sql, values)
        conn.commit()
        conn.close()
    except sqlite3.IntegrityError:
        print(f"Activity {activity_id} already exists in the 'streams' table. Skipping insert.")

def insert_single_gear(gear, db_path=DB_PATH):
    """Insert activity records, skipping those with duplicate 'id'."""
    conn = sqlite3.connect(DB_PATH)
    c = conn.cursor()
    try:
        c.execute('''
        INSERT OR REPLACE INTO gear VALUES (
            :gear_id, :is_primary, :nickname, 
            :resource_state, :retired, :distance,
            :brand_name, :model_name,
            :frame_type, :description, :weight
                  )
        ''', {
            "gear_id": gear.get("id"),
            "is_primary": gear.get("primary"),
            "nickname" : gear.get("nickname"),
            "resource_state": gear.get("resource_state"),
            "retired" : gear.get("retired"),
            "distance": gear.get("distance"),
            "brand_name": gear.get("brand_name"),
            "model_name": gear.get("model_name"),
            "frame_type": gear.get("frame_type"),
            "description": gear.get("description"),
            "weight" : gear.get("weight"),
            "import_date": datetime.now().isoformat()
        })
    except sqlite3.IntegrityError:
        print(f"Skipping duplicate activity with id {gear['id']}")

    conn.commit()
    conn.close()

# This needs to happen first and everything else will fail if this fails
def refresh_access_token(client_id, client_secret, refresh_token):
    """API call using local refresh token to get new access token.
    Needs to pull client_id, client_secret, and refresh_token from environment variables.
    Needs to write new refresh token to environment variables and pass on the received access
    token to the following api functions
    """
    response = requests.post(
        url="https://www.strava.com/oauth/token",
        data={
            "client_id": client_id,
            "client_secret": client_secret,
            "grant_type": "refresh_token",
            "refresh_token": refresh_token,
        },
    )
    response.raise_for_status()  # Raise HTTPError for bad responses (4xx or 5xx)
    os.environ["REFRESH_TOKEN"] = response["refresh_token"]
    kwargs['ti'].xcom_push(key='access_token', value=response.json()["access_token"])
    
def fetch_activity_data_and_write_to_db(**kwargs):
    """Function to call the strava api for latest activity data since that last import_date.
    Needs to accept an before and after unix timestamp as a parameter as well as db path
    to query for the latest import_date.
    """
    after = latest_activity_import_date(db_path)
    before = after + 86400
    page = 1
    per_page = 30
    try:
        activities_page = get_activities(ACCESS_TOKEN, page=page, per_page=per_page, before = before, after = after)     
    except requests.exceptions.HTTPError as e:
        print(f"HTTP error occurred: {e}")
    except json.JSONDecodeError as e:
        print(f"JSON decode error occurred: {e}")
    except Exception as e:
        print(f"An unexpected error occurred: {e}")
    insert_activities_batch(activities_page)
    kwargs['ti'].xcom_push(key='activity_dict', value=activities_page)


def fetch_streams_data_and_write_to_db(**kwargs):
    """Function to call the strava api for stream data related to activity id's pulled in from 
    fetch activity_data then write single stream datasets to the db.
    """
    for activity in activities_page:
        try:
            stream = get_streams(ACCESS_TOKEN, keys, int(activity[id]))
            if not stream:
                print('no stream')
                continue
        
        except requests.exceptions.HTTPError as e:
            print(f"HTTP error occurred: {e}")
            if "429" in str(e):
                print("RATE LIMITED!!!")
                break
            if "404" in str(e):
                print("Stream data unavailable for activity")
                continue
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
        time.sleep(1)  # Respect rate limits, adjust if needed
        insert_stream_data(activity[id], stream)

def fetch_gear_data_and_write_to_db(**kwargs):
    """Function to call the strava api for single gear data related to gear_id pulled in from 
    fetch activity_data then write single gear update to the db.
    """
    for activity in activities_page:
        try:
            shoe = activity['gear_id']
            if not shoe:
                print('no shoe')
                continue
        
        except requests.exceptions.HTTPError as e:
            print(f"HTTP error occurred: {e}")
            if "429" in str(e):
                print("RATE LIMITED!!!")
                break
            if "404" in str(e):
                print("Stream data unavailable for activity")
                continue
        except Exception as e:
            print(f"An unexpected error occurred: {e}")
        time.sleep(1)  # Respect rate limits, adjust if needed
        insert_single_gear(shoe)


default_args = {
    'owner': 'you',
    'depends_on_past': False,
    'email_on_failure': False,
    'retries': 1,
    'retry_delay': timedelta(minutes=5),
}

with DAG(
    'api_to_db_pipeline',
    default_args=default_args,
    description='Fetch Strava API Activity data and update 3 db tables',
    schedule_interval='30 7,17,19 * * *',
    start_date=days_ago(1),
    catchup=False,
    tags=['example'],
) as dag:

    refresh_tokens = PythonOperator(
        task_id='refresh_access_token',
        python_callable=refresh_access_token,
        provide_context=True,
    )

    update_activities = PythonOperator(
        task_id='fetch_and_write_activities',
        python_callable=fetch_activity_data_and_write_to_db,
        provide_context=True,
    )

    update_streams = PythonOperator(
        task_id='fetch_and_write_streams',
        python_callable=fetch_streams_data_and_write_to_db,
        provide_context=True,
    )

    update_gear = PythonOperator(
    task_id='fetch_and_write_gear',
    python_callable=fetch_gear_data_and_write_to_db,
    provide_context=True,
    )


    refresh_tokens >> [update_activities, update_streams, update_gear]


## GEMMA TIME

In [3]:
import torch
# from accelerate import disk_offload # trying to manage memory
from transformers import pipeline
from huggingface_hub import login
import os

# load_dotenv('secrets.env') # it's magically working now? 
hf_secret = os.getenv("HF_TOKEN") # access a token registered on huggingface to allow use of gated model
login(token = hf_secret, add_to_git_credential = False) # performs cli login using token above

# Defines the model and pipeline to be used for text generation
pipe = pipeline(
    "text-generation",
    model="google/gemma-2-2b-it",
    model_kwargs={"torch_dtype": torch.bfloat16,},
    device_map="cpu", # other options exist here but cpu seems to work and avoids GPU per our goals
)

Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00,  2.16it/s]
Device set to use cpu


In [19]:
user_ask = """Given the following SQL Database schema of a user's running and biking data:
        activities table:
        id INTEGER PRIMARY KEY,
        distance REAL, -- in meters
        moving_time INTEGER, -- in seconds
        elapsed_time INTEGER, -- in seconds
        total_elevation_gain REAL, -- in meters
        type TEXT, -- can be Run or Ride
        workout_type INTEGER,
        start_date_local TEXT,
        kudos_count INTEGER,
        gear_id TEXT, -- foreign key to gear table
        average_speed REAL,
        max_speed REAL,
        average_cadence REAL,
        average_watts REAL,
        max_watts INTEGER,
        weighted_average_watts INTEGER,
        device_watts BOOLEAN,
        kilojoules REAL,
        average_heartrate REAL,
        max_heartrate REAL,
        elev_high REAL,
        elev_low REAL,
        import_date TEXT

        gear table (contains shoe and bike data):
        gear_id TEXT PRIMARY KEY,
        nickname TEXT,
        resource_state INTEGER,
        retired BOOLEAN,
        distance INTEGER,
        brand_name TEXT,
        model_name TEXT,      
        description TEXT,
        
        Write a SQL query to return the id of all run activities where I wore Brooks brand shoes"""

In [20]:
messages = [
{"role": "user", "content": "You are business proposal writing professional. "
"Please respond to the following query with a single paragraph. " + user_ask},
]   
outputs = pipe(messages, max_new_tokens=256)
assistant_response = outputs[0]["generated_text"][-1]["content"].strip()

In [21]:
print(assistant_response)

```sql
SELECT activities.id
FROM activities
JOIN gear ON activities.gear_id = gear.gear_id
WHERE gear.brand_name = 'Brooks';
```


In [27]:
import re
match = re.search(r"```sql\s+(.*?;)", assistant_response, re.DOTALL)
query = match.group(1)

In [28]:
print(query)

SELECT activities.id
FROM activities
JOIN gear ON activities.gear_id = gear.gear_id
WHERE gear.brand_name = 'Brooks';


## RUnstrong DB Populate


In [1]:
exercises_data = [
    
    {
        'name': 'Box Jump',
        'description': 'Explosive plyometric exercise involving jumping onto an elevated platform to develop lower body power and coordination.',
        'instructions': '1. Stand arm\'s length from box\n2. Swing arms back and bend into quarter squat\n3. Explode up swinging arms forward\n4. Land softly on box with both feet\n5. Step down carefully, don\'t jump down',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Jump',
        'primary_muscles': ['Quadriceps', 'Glutes'],
        'secondary_muscles': ['Hamstrings', 'Calves', 'Core'],
        'muscle_groups': 'Lower body',
        'unilateral': False,
        'difficulty_rating': 6,
        'prerequisites': 'Basic jumping ability, good landing mechanics',
        'progressions': ['Jump squats', 'Step-ups'],
        'regressions': ['Higher box jumps', 'Weighted box jumps'],
        'equipment_required': ['Plyometric box'],
        'equipment_optional': None,
        'setup_time': 1,
        'space_required': 'Moderate',
        'rep_range_min': 3,
        'rep_range_max': 10,
        'tempo': 'Explosive',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Knee injuries, ankle problems, poor landing mechanics',
        'common_mistakes': ['Box too high', 'Jumping down instead of stepping', 'Poor landing position'],
        'safety_notes': 'Start with lower box, focus on soft landings, step down',
        'category': 'Power',
        'training_style': ['Functional', 'Sport-specific'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Power', 'Explosiveness'],
        'duration_minutes': 2,
        'popularity_score': 8,
        'alternatives': ['Jump Squats', 'Broad Jumps', 'Step-ups'],
        'supersets_well_with': ['Pull-ups', 'Plank variations']
    },
    {
        'name': 'Kettlebell Romanian Deadlift with Dumbbells',
        'description': 'Hip hinge movement using dumbbells in place of kettlebell to target posterior chain muscles with emphasis on hamstrings and glutes.',
        'instructions': '1. Hold dumbbells in front of thighs with feet hip-width apart\n2. Initiate movement by pushing hips back\n3. Lower weights while keeping them close to legs\n4. Feel stretch in hamstrings\n5. Drive hips forward to return to standing',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Hinge',
        'primary_muscles': ['Hamstrings', 'Glutes'],
        'secondary_muscles': ['Lower back', 'Core', 'Forearms'],
        'muscle_groups': 'Lower body',
        'unilateral': False,
        'difficulty_rating': 5,
        'prerequisites': 'Hip hinge pattern, hamstring flexibility',
        'progressions': ['Good mornings', 'Glute bridges'],
        'regressions': ['Conventional deadlift', 'Single-leg RDL'],
        'equipment_required': ['Dumbbells'],
        'equipment_optional': None,
        'setup_time': 1,
        'space_required': 'Moderate',
        'rep_range_min': 8,
        'rep_range_max': 15,
        'tempo': '3-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Lower back injuries, severe hamstring tightness',
        'common_mistakes': ['Rounding back', 'Bending knees too much', 'Not feeling hamstring stretch'],
        'safety_notes': 'Maintain neutral spine, initiate with hip hinge',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodybuilding'],
        'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy'],
        'duration_minutes': 3,
        'popularity_score': 8,
        'alternatives': ['Romanian Deadlift', 'Good Mornings', 'Glute Ham Raise'],
        'supersets_well_with': ['Chest Press', 'Plank variations']
    },
    {
        'name': 'Deadlift',
        'description': 'Fundamental compound movement lifting weight from the ground, targeting the entire posterior chain and core.',
        'instructions': '1. Stand with feet hip-width apart, bar over mid-foot\n2. Bend at hips and knees to grip bar\n3. Keep chest up and back neutral\n4. Drive through heels and extend hips and knees\n5. Stand tall with shoulders back',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Hinge',
        'primary_muscles': ['Hamstrings', 'Glutes', 'Lower back'],
        'secondary_muscles': ['Quadriceps', 'Core', 'Upper back', 'Forearms'],
        'muscle_groups': 'Full body',
        'unilateral': False,
        'difficulty_rating': 8,
        'prerequisites': 'Hip hinge pattern, adequate mobility, proper form instruction',
        'progressions': ['Romanian Deadlift', 'Rack pulls'],
        'regressions': ['Sumo deadlift', 'Trap bar deadlift'],
        'equipment_required': ['Barbell', 'Weight plates'],
        'equipment_optional': 'Lifting straps, chalk',
        'setup_time': 3,
        'space_required': 'Large',
        'rep_range_min': 1,
        'rep_range_max': 10,
        'tempo': '1-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'High',
        'contraindications': 'Lower back injuries, severe mobility restrictions',
        'common_mistakes': ['Rounding back', 'Bar drifting away', 'Hyperextending at top'],
        'safety_notes': 'Master form with light weight first, use proper progression',
        'category': 'Strength',
        'training_style': ['Powerlifting', 'Functional'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Strength', 'Power'],
        'duration_minutes': 4,
        'popularity_score': 10,
        'alternatives': ['Romanian Deadlift', 'Trap Bar Deadlift', 'Rack Pulls'],
        'supersets_well_with': ['Pull-ups', 'Plank variations']
    },
    {
        'name': 'Pull-up',
        'description': 'Upper body pulling exercise performed by hanging from a bar and pulling the body up until chin clears the bar.',
        'instructions': '1. Hang from pull-up bar with arms fully extended\n2. Use overhand grip slightly wider than shoulders\n3. Pull body up until chin clears bar\n4. Lower with control to full hang\n5. Avoid swinging or kipping',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Pull',
        'primary_muscles': ['Latissimus dorsi', 'Rhomboids'],
        'secondary_muscles': ['Biceps', 'Rear deltoids', 'Core'],
        'muscle_groups': 'Upper body',
        'unilateral': False,
        'difficulty_rating': 7,
        'prerequisites': 'Adequate upper body strength, dead hang ability',
        'progressions': ['Assisted pull-ups', 'Negative pull-ups'],
        'regressions': ['Weighted pull-ups', 'One-arm pull-ups'],
        'equipment_required': ['Pull-up bar'],
        'equipment_optional': 'Assistance bands',
        'setup_time': 1,
        'space_required': 'Minimal',
        'rep_range_min': 3,
        'rep_range_max': 15,
        'tempo': '2-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Shoulder impingement, elbow issues',
        'common_mistakes': ['Partial range of motion', 'Swinging', 'Neck craning'],
        'safety_notes': 'Full range of motion, control descent, proper grip',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodyweight'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy'],
        'duration_minutes': 2,
        'popularity_score': 9,
        'alternatives': ['Lat Pulldown', 'Assisted Pull-ups', 'Chin-ups'],
        'supersets_well_with': ['Push-ups', 'Squats', 'Deadlifts']
    },
    {
        'name': 'Plank',
        'description': 'Isometric core exercise maintaining a straight body position supported by forearms and toes.',
        'instructions': '1. Start in push-up position then lower to forearms\n2. Keep body in straight line from head to heels\n3. Engage core and glutes\n4. Hold position while breathing normally\n5. Avoid sagging hips or raising butt',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Anti-extension',
        'primary_muscles': ['Core', 'Deep abdominals'],
        'secondary_muscles': ['Shoulders', 'Glutes', 'Back'],
        'muscle_groups': 'Core',
        'unilateral': False,
        'difficulty_rating': 4,
        'prerequisites': 'Basic core strength',
        'progressions': ['Wall plank', 'Knee plank'],
        'regressions': ['Plank with leg lifts', 'Single-arm plank'],
        'equipment_required': None,
        'equipment_optional': 'Exercise mat',
        'setup_time': 0,
        'space_required': 'Minimal',
        'rep_range_min': 1,
        'rep_range_max': 1,
        'tempo': 'Hold 15-120 seconds',
        'range_of_motion': 'Static',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Low',
        'contraindications': 'Lower back pain, shoulder injuries',
        'common_mistakes': ['Sagging hips', 'Raising butt', 'Holding breath'],
        'safety_notes': 'Maintain neutral spine, breathe normally',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodyweight'],
        'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
        'goals': ['Core strength', 'Stability'],
        'duration_minutes': 2,
        'popularity_score': 9,
        'alternatives': ['Dead Bug', 'Bird Dog', 'Side Plank'],
        'supersets_well_with': ['Any exercise', 'Glute bridges']
    },
    {
        'name': 'Banded Clams',
        'description': 'Hip strengthening exercise using resistance band to target glute medius and improve hip stability.',
        'instructions': '1. Lie on side with knees bent and band around thighs\n2. Keep feet together\n3. Lift top knee up against band resistance\n4. Feel activation in side of glute\n5. Lower with control and repeat',
        'exercise_type': 'Resistance band',
        'movement_pattern': 'Hip abduction',
        'primary_muscles': ['Glute medius'],
        'secondary_muscles': ['Glute minimus', 'Hip stabilizers'],
        'muscle_groups': 'Lower body',
        'unilateral': True,
        'difficulty_rating': 3,
        'prerequisites': 'Basic hip mobility',
        'progressions': ['Clamshells without band'],
        'regressions': ['Standing clamshells', 'Clamshells with heavier band'],
        'equipment_required': ['Resistance band'],
        'equipment_optional': 'Exercise mat',
        'setup_time': 1,
        'space_required': 'Minimal',
        'rep_range_min': 12,
        'rep_range_max': 25,
        'tempo': '2-2-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Low',
        'contraindications': 'Hip impingement, IT band syndrome',
        'common_mistakes': ['Rolling back', 'Using hip flexors', 'Too fast tempo'],
        'safety_notes': 'Focus on glute activation, maintain side-lying position',
        'category': 'Strength',
        'training_style': ['Rehabilitation', 'Functional'],
        'experience_level': ['Beginner', 'Intermediate'],
        'goals': ['Hip stability', 'Injury prevention'],
        'duration_minutes': 2,
        'popularity_score': 7,
        'alternatives': ['Side-lying hip abduction', 'Monster walks', 'Fire hydrants'],
        'supersets_well_with': ['Glute bridges', 'Leg raises']
    },
    {
        'name': 'Leg Raise',
        'description': 'Core strengthening exercise performed by lifting legs while lying down to target lower abdominals and hip flexors.',
        'instructions': '1. Lie on back with legs straight\n2. Place hands under lower back for support\n3. Keep legs straight and lift toward ceiling\n4. Lower legs slowly without touching ground\n5. Maintain lower back contact with floor',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Hip flexion',
        'primary_muscles': ['Lower abdominals', 'Hip flexors'],
        'secondary_muscles': ['Core', 'Quadriceps'],
        'muscle_groups': 'Core',
        'unilateral': False,
        'difficulty_rating': 5,
        'prerequisites': 'Basic core strength, hip flexor flexibility',
        'progressions': ['Bent knee raises', 'Assisted leg raises'],
        'regressions': ['Hanging leg raises', 'Weighted leg raises'],
        'equipment_required': None,
        'equipment_optional': 'Exercise mat',
        'setup_time': 0,
        'space_required': 'Minimal',
        'rep_range_min': 8,
        'rep_range_max': 20,
        'tempo': '2-1-3-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Lower back pain, hip flexor tightness',
        'common_mistakes': ['Arching back', 'Using momentum', 'Not controlling descent'],
        'safety_notes': 'Keep lower back pressed to floor, control the movement',
        'category': 'Strength',
        'training_style': ['Bodyweight', 'Bodybuilding'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Core strength', 'Hypertrophy'],
        'duration_minutes': 2,
        'popularity_score': 7,
        'alternatives': ['Hanging Leg Raises', 'Reverse Crunches', 'Dead Bug'],
        'supersets_well_with': ['Plank variations', 'Calf raises']
    },
    {
        'name': 'Lateral Pull Down',
        'description': 'Upper body pulling exercise using a lat pulldown machine to target the latissimus dorsi and other back muscles.',
        'instructions': '1. Sit at lat pulldown machine with thighs secured\n2. Grip bar with wide overhand grip\n3. Pull bar down to upper chest\n4. Squeeze shoulder blades together\n5. Control the weight back to starting position',
        'exercise_type': 'Machine',
        'movement_pattern': 'Pull',
        'primary_muscles': ['Latissimus dorsi'],
        'secondary_muscles': ['Rhomboids', 'Middle trapezius', 'Biceps'],
        'muscle_groups': 'Upper body',
        'unilateral': False,
        'difficulty_rating': 4,
        'prerequisites': 'Basic pulling strength',
        'progressions': ['Assisted pull-ups'],
        'regressions': ['Pull-ups', 'Single-arm lat pulldown'],
        'equipment_required': ['Lat pulldown machine'],
        'equipment_optional': 'Different grip attachments',
        'setup_time': 2,
        'space_required': 'Large',
        'rep_range_min': 6,
        'rep_range_max': 15,
        'tempo': '2-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Low',
        'contraindications': 'Shoulder impingement, lat tightness',
        'common_mistakes': ['Pulling behind neck', 'Leaning too far back', 'Using momentum'],
        'safety_notes': 'Pull to front of body, maintain upright posture',
        'category': 'Strength',
        'training_style': ['Bodybuilding', 'Functional'],
        'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy'],
        'duration_minutes': 3,
        'popularity_score': 8,
        'alternatives': ['Pull-ups', 'Seated Cable Row', 'T-Bar Row'],
        'supersets_well_with': ['Chest Press', 'Shoulder exercises']
    },
    {
        'name': 'Weighted Bar Squat',
        'description': 'Fundamental compound movement using a barbell to load the squat pattern, targeting the entire lower body.',
        'instructions': '1. Position barbell on your upper back (high or low bar)\n2. Stand with feet shoulder-width apart\n3. Initiate movement by pushing hips back\n4. Lower until thighs are parallel to ground\n5. Drive through heels to return to standing',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Squat',
        'primary_muscles': ['Quadriceps', 'Glutes'],
        'secondary_muscles': ['Hamstrings', 'Calves', 'Core', 'Upper back'],
        'muscle_groups': 'Lower body',
        'unilateral': False,
        'difficulty_rating': 7,
        'prerequisites': 'Bodyweight squat mastery, proper mobility',
        'progressions': ['Bodyweight Squat', 'Goblet Squat'],
        'regressions': ['Front Squat', 'Overhead Squat'],
        'equipment_required': ['Barbell', 'Squat Rack'],
        'equipment_optional': 'Lifting shoes',
        'setup_time': 3,
        'space_required': 'Large',
        'rep_range_min': 3,
        'rep_range_max': 20,
        'tempo': '3-1-X-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Knee injuries, lower back issues',
        'common_mistakes': ['Knee valgus', 'Forward lean', 'Partial range of motion'],
        'safety_notes': 'Use safety bars, warm up thoroughly, maintain neutral spine',
        'category': 'Strength',
        'training_style': ['Powerlifting', 'Bodybuilding', 'Functional'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy', 'Power'],
        'duration_minutes': 4,
        'popularity_score': 10,
        'alternatives': ['Goblet Squat', 'Leg Press', 'Bulgarian Split Squat'],
        'supersets_well_with': ['Pull-up', 'Plank']
    },
    {
        'name': 'Eccentric Heel Drop',
        'description': 'Rehabilitation exercise focusing on the eccentric (lowering) phase to strengthen the Achilles tendon and calf muscles.',
        'instructions': '1. Stand on a step with balls of feet on edge\n2. Rise up onto toes with both feet\n3. Lift one foot off the step\n4. Slowly lower the heel of working leg below step level\n5. Use both feet to return to starting position',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Calf raise',
        'primary_muscles': ['Calves'],
        'secondary_muscles': ['Achilles tendon'],
        'muscle_groups': 'Lower body',
        'unilateral': True,
        'difficulty_rating': 4,
        'prerequisites': 'Basic calf raise',
        'progressions': ['Calf Raise'],
        'regressions': ['Weighted Eccentric Heel Drop'],
        'equipment_required': ['Step/Box'],
        'equipment_optional': None,
        'setup_time': 1,
        'space_required': 'Minimal',
        'rep_range_min': 10,
        'rep_range_max': 20,
        'tempo': '1-1-5-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Low',
        'contraindications': 'Acute Achilles injury',
        'common_mistakes': ['Too fast eccentric', 'Using both legs to lower'],
        'safety_notes': 'Progress slowly, stop if pain increases',
        'category': 'Strength',
        'training_style': ['Rehabilitation', 'Functional'],
        'experience_level': ['Beginner', 'Intermediate'],
        'goals': ['Strength', 'Rehabilitation'],
        'duration_minutes': 2,
        'popularity_score': 5,
        'alternatives': ['Calf Raise', 'Seated Calf Raise'],
        'supersets_well_with': ['Leg Raise', 'Plank']
    },
    {
        'name': 'Bulgarian Split Squat',
        'description': 'Unilateral lower body exercise performed with rear foot elevated, emphasizing single-leg strength and stability.',
        'instructions': '1. Stand 2-3 feet in front of a bench\n2. Place top of rear foot on bench behind you\n3. Lower into lunge position until front thigh is parallel\n4. Drive through front heel to return to start\n5. Complete all reps before switching legs',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Lunge',
        'primary_muscles': ['Quadriceps', 'Glutes'],
        'secondary_muscles': ['Hamstrings', 'Calves', 'Core'],
        'muscle_groups': 'Lower body',
        'unilateral': True,
        'difficulty_rating': 6,
        'prerequisites': 'Basic lunge, single-leg balance',
        'progressions': ['Reverse Lunge', 'Split Squat'],
        'regressions': ['Weighted Bulgarian Split Squat', 'Jump Bulgarian Split Squat'],
        'equipment_required': ['Bench/Box'],
        'equipment_optional': 'Dumbbells',
        'setup_time': 2,
        'space_required': 'Moderate',
        'rep_range_min': 8,
        'rep_range_max': 20,
        'tempo': '2-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Knee injuries, ankle mobility issues',
        'common_mistakes': ['Too much weight on rear foot', 'Knee valgus', 'Forward lean'],
        'safety_notes': 'Focus on front leg doing the work, maintain balance',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodybuilding'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy', 'Functional'],
        'duration_minutes': 3,
        'popularity_score': 8,
        'alternatives': ['Lunges', 'Single Leg Squat', 'Step Up'],
        'supersets_well_with': ['Push-up variations', 'Plank']
    },
    {
        'name': 'Single-Arm Press With Balance',
        'description': 'Unilateral overhead press performed while balancing on one leg to challenge stability and core strength.',
        'instructions': '1. Stand on one leg holding dumbbell at shoulder height\n2. Engage core and maintain balance\n3. Press weight overhead while staying balanced\n4. Lower with control\n5. Complete set before switching sides',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Push',
        'primary_muscles': ['Shoulders', 'Core'],
        'secondary_muscles': ['Triceps', 'Upper back', 'Glutes', 'Hip stabilizers'],
        'muscle_groups': 'Full body',
        'unilateral': True,
        'difficulty_rating': 7,
        'prerequisites': 'Single-leg balance, overhead press',
        'progressions': ['Overhead Press', 'Single-leg stands'],
        'regressions': ['Single-arm press with eyes closed', 'Single-arm press on unstable surface'],
        'equipment_required': ['Dumbbell'],
        'equipment_optional': None,
        'setup_time': 1,
        'space_required': 'Moderate',
        'rep_range_min': 6,
        'rep_range_max': 12,
        'tempo': '2-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Shoulder impingement, balance disorders',
        'common_mistakes': ['Using too heavy weight', 'Losing balance', 'Compensatory movements'],
        'safety_notes': 'Start light, have something to grab for balance if needed',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodybuilding'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Strength', 'Functional', 'Balance'],
        'duration_minutes': 3,
        'popularity_score': 6,
        'alternatives': ['Overhead Press', 'Single-arm Press', 'Pike Push-up'],
        'supersets_well_with': ['Single Leg Squat', 'Plank variations']
    },
    {
        'name': 'Weighted Offset Dead Bug',
        'description': 'Anti-extension core exercise performed with uneven loading to challenge stability and coordination.',
        'instructions': '1. Lie on back with arms extended toward ceiling\n2. Hold different weights in each hand\n3. Bring knees to 90 degrees\n4. Extend opposite arm and leg while maintaining lower back contact\n5. Return to start and alternate sides',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Anti-extension',
        'primary_muscles': ['Core', 'Deep abdominals'],
        'secondary_muscles': ['Hip flexors', 'Shoulders'],
        'muscle_groups': 'Core',
        'unilateral': True,
        'difficulty_rating': 6,
        'prerequisites': 'Regular dead bug, core stability',
        'progressions': ['Dead Bug', 'Bird Dog'],
        'regressions': ['Weighted Dead Bug (even weight)', 'Dead Bug with band'],
        'equipment_required': ['Dumbbells (different weights)'],
        'equipment_optional': None,
        'setup_time': 2,
        'space_required': 'Minimal',
        'rep_range_min': 6,
        'rep_range_max': 12,
        'tempo': '2-2-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Low',
        'contraindications': 'Lower back pain, shoulder injuries',
        'common_mistakes': ['Losing lower back contact', 'Moving too fast', 'Holding breath'],
        'safety_notes': 'Maintain neutral spine throughout, breathe normally',
        'category': 'Strength',
        'training_style': ['Functional', 'Rehabilitation'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Stability', 'Core strength'],
        'duration_minutes': 3,
        'popularity_score': 5,
        'alternatives': ['Dead Bug', 'Pallof Press', 'Plank variations'],
        'supersets_well_with': ['Glute bridges', 'Calf raises']
    },
    {
        'name': 'Shrug',
        'description': 'Isolation exercise targeting the upper trapezius muscles by elevating the shoulders against resistance.',
        'instructions': '1. Stand holding weights at your sides or in front\n2. Keep arms straight and shoulders relaxed\n3. Shrug shoulders up toward ears\n4. Squeeze at the top\n5. Lower shoulders slowly',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Shrug',
        'primary_muscles': ['Upper trapezius'],
        'secondary_muscles': ['Levator scapulae', 'Rhomboids'],
        'muscle_groups': 'Upper body',
        'unilateral': False,
        'difficulty_rating': 3,
        'prerequisites': 'Basic grip strength',
        'progressions': ['Bodyweight shrugs'],
        'regressions': ['Barbell shrugs', 'Behind-the-back shrugs'],
        'equipment_required': ['Dumbbells or Barbell'],
        'equipment_optional': 'Straps',
        'setup_time': 1,
        'space_required': 'Minimal',
        'rep_range_min': 10,
        'rep_range_max': 20,
        'tempo': '1-2-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Low',
        'contraindications': 'Neck injuries, shoulder impingement',
        'common_mistakes': ['Rolling shoulders', 'Using too much weight', 'Partial range of motion'],
        'safety_notes': 'Avoid rolling shoulders, straight up and down motion only',
        'category': 'Strength',
        'training_style': ['Bodybuilding', 'Powerlifting'],
        'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
        'goals': ['Hypertrophy', 'Strength'],
        'duration_minutes': 2,
        'popularity_score': 7,
        'alternatives': ['Upright Row', 'Face Pulls', 'Trap Raise'],
        'supersets_well_with': ['Calf Raise', 'Lateral raises']
    },
    {
        'name': '45 lb Calf Raise',
        'description': 'Calf strengthening exercise performed with a 45-pound weight to target the gastrocnemius and soleus muscles.',
        'instructions': '1. Stand holding 45lb weight (plate or dumbbell)\n2. Position balls of feet on slightly elevated surface\n3. Rise up onto toes as high as possible\n4. Squeeze calves at the top\n5. Lower heels below starting position for stretch',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Calf raise',
        'primary_muscles': ['Calves'],
        'secondary_muscles': ['Forearms'],
        'muscle_groups': 'Lower body',
        'unilateral': False,
        'difficulty_rating': 4,
        'prerequisites': 'Basic calf raise',
        'progressions': ['Bodyweight calf raise'],
        'regressions': ['Heavier weighted calf raise', 'Single-leg calf raise'],
        'equipment_required': ['45lb weight', 'Step/Platform'],
        'equipment_optional': None,
        'setup_time': 2,
        'space_required': 'Minimal',
        'rep_range_min': 12,
        'rep_range_max': 25,
        'tempo': '1-2-2-2',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Low',
        'contraindications': 'Achilles tendon injuries, calf strains',
        'common_mistakes': ['Bouncing at bottom', 'Partial range of motion', 'Using momentum'],
        'safety_notes': 'Control the weight, don\'t bounce, maintain balance',
        'category': 'Strength',
        'training_style': ['Bodybuilding', 'Functional'],
        'experience_level': ['Beginner', 'Intermediate'],
        'goals': ['Hypertrophy', 'Strength'],
        'duration_minutes': 2,
        'popularity_score': 8,
        'alternatives': ['Seated Calf Raise', 'Single-leg Calf Raise', 'Eccentric Heel Drop'],
        'supersets_well_with': ['Shrug', 'Leg Raise']
    },
    {
        'name': 'Chest Press',
        'description': 'Fundamental pushing exercise targeting the chest, shoulders, and triceps using dumbbells or machine.',
        'instructions': '1. Lie on bench with weights at chest level\n2. Plant feet firmly on ground\n3. Press weights up and slightly together\n4. Lower with control to chest level\n5. Maintain shoulder blade stability',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Push',
        'primary_muscles': ['Chest'],
        'secondary_muscles': ['Shoulders', 'Triceps'],
        'muscle_groups': 'Upper body',
        'unilateral': False,
        'difficulty_rating': 4,
        'prerequisites': 'Basic pushing movement',
        'progressions': ['Push-up', 'Incline press'],
        'regressions': ['Barbell bench press', 'Single-arm press'],
        'equipment_required': ['Dumbbells', 'Bench'],
        'equipment_optional': None,
        'setup_time': 2,
        'space_required': 'Moderate',
        'rep_range_min': 6,
        'rep_range_max': 15,
        'tempo': '2-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Shoulder impingement, recent chest surgery',
        'common_mistakes': ['Bouncing off chest', 'Flaring elbows too wide', 'Arching back excessively'],
        'safety_notes': 'Use spotter for heavy weights, maintain control throughout',
        'category': 'Strength',
        'training_style': ['Bodybuilding', 'Powerlifting'],
        'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy'],
        'duration_minutes': 3,
        'popularity_score': 9,
        'alternatives': ['Push-up', 'Bench Press', 'Chest Flies'],
        'supersets_well_with': ['Pull-up', 'Lat Pulldown', 'Shrug']
    },
    {
        'name': 'Chest Flies',
        'description': 'Isolation exercise for the chest performed with a fly motion to target pectoral muscles with minimal tricep involvement.',
        'instructions': '1. Lie on bench holding dumbbells above chest\n2. Lower weights in wide arc with slight elbow bend\n3. Feel stretch in chest at bottom\n4. Bring weights together above chest in hugging motion\n5. Squeeze chest muscles at top',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Fly',
        'primary_muscles': ['Chest'],
        'secondary_muscles': ['Shoulders'],
        'muscle_groups': 'Upper body',
        'unilateral': False,
        'difficulty_rating': 5,
        'prerequisites': 'Chest press, shoulder stability',
        'progressions': ['Chest press'],
        'regressions': ['Cable flies', 'Incline flies'],
        'equipment_required': ['Dumbbells', 'Bench'],
        'equipment_optional': None,
        'setup_time': 2,
        'space_required': 'Moderate',
        'rep_range_min': 8,
        'rep_range_max': 15,
        'tempo': '2-2-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Shoulder impingement, chest strains',
        'common_mistakes': ['Using too much weight', 'Dropping elbows too low', 'Turning into press'],
        'safety_notes': 'Keep slight bend in elbows, control the stretch',
        'category': 'Strength',
        'training_style': ['Bodybuilding'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Hypertrophy'],
        'duration_minutes': 3,
        'popularity_score': 7,
        'alternatives': ['Cable Flies', 'Pec Deck', 'Push-up variations'],
        'supersets_well_with': ['Tricep exercises', 'Rear delt flies']
    },
    {
        'name': 'Single Leg Squat',
        'description': 'Advanced unilateral exercise requiring significant strength, balance, and mobility to perform a full squat on one leg.',
        'instructions': '1. Stand on one leg with other leg extended forward\n2. Keep extended leg straight and off ground\n3. Lower into squat position on standing leg\n4. Go as deep as possible while maintaining form\n5. Drive through heel to return to standing',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Squat',
        'primary_muscles': ['Quadriceps', 'Glutes'],
        'secondary_muscles': ['Hamstrings', 'Calves', 'Core', 'Hip stabilizers'],
        'muscle_groups': 'Lower body',
        'unilateral': True,
        'difficulty_rating': 9,
        'prerequisites': 'Excellent single-leg balance, strong squat, ankle mobility',
        'progressions': ['Assisted single-leg squat', 'Box pistol squat'],
        'regressions': ['Jump single-leg squat', 'Weighted single-leg squat'],
        'equipment_required': None,
        'equipment_optional': 'TRX or assistance',
        'setup_time': 1,
        'space_required': 'Minimal',
        'rep_range_min': 3,
        'rep_range_max': 10,
        'tempo': '3-1-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Compound',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Knee injuries, poor balance, limited ankle mobility',
        'common_mistakes': ['Knee valgus', 'Using momentum', 'Partial range of motion'],
        'safety_notes': 'Progress slowly, ensure adequate strength and mobility first',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodyweight'],
        'experience_level': ['Advanced'],
        'goals': ['Strength', 'Functional', 'Balance'],
        'duration_minutes': 2,
        'popularity_score': 6,
        'alternatives': ['Bulgarian Split Squat', 'Lunges', 'Step-ups'],
        'supersets_well_with': ['Push-up variations', 'Plank']
    },
    {
        'name': 'Pogo Hops',
        'description': 'Plyometric exercise focusing on quick, repetitive jumping to develop calf strength, ankle stiffness, and reactive ability.',
        'instructions': '1. Stand with feet hip-width apart\n2. Keep legs relatively straight\n3. Hop up and down quickly using only ankles and calves\n4. Minimize ground contact time\n5. Land on balls of feet and immediately bounce up',
        'exercise_type': 'Bodyweight',
        'movement_pattern': 'Jump',
        'primary_muscles': ['Calves', 'Ankles'],
        'secondary_muscles': ['Core', 'Hip stabilizers'],
        'muscle_groups': 'Lower body',
        'unilateral': False,
        'difficulty_rating': 5,
        'prerequisites': 'Basic jumping ability, healthy ankles',
        'progressions': ['Calf raises', 'Small hops'],
        'regressions': ['Single-leg pogo hops', 'Weighted pogo hops'],
        'equipment_required': None,
        'equipment_optional': None,
        'setup_time': 0,
        'space_required': 'Minimal',
        'rep_range_min': 15,
        'rep_range_max': 50,
        'tempo': 'Fast',
        'range_of_motion': 'Partial',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Ankle injuries, Achilles problems, shin splints',
        'common_mistakes': ['Bending knees too much', 'Slow rhythm', 'Landing on heels'],
        'safety_notes': 'Start with shorter sets, progress volume gradually',
        'category': 'Power',
        'training_style': ['Functional', 'Sport-specific'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Power', 'Reactive strength'],
        'duration_minutes': 1,
        'popularity_score': 6,
        'alternatives': ['Jump rope', 'Calf raises', 'Box jumps'],
        'supersets_well_with': ['Static stretches', 'Mobility work']
    },
    {
        'name': 'Trap Raise',
        'description': 'Isolation exercise targeting the middle and lower trapezius muscles through shoulder blade retraction and elevation.',
        'instructions': '1. Lie face down on incline bench or stand bent over\n2. Hold light weights with arms extended\n3. Raise arms up and back in Y-formation\n4. Squeeze shoulder blades together\n5. Lower with control',
        'exercise_type': 'Free weights',
        'movement_pattern': 'Pull',
        'primary_muscles': ['Middle trapezius', 'Lower trapezius'],
        'secondary_muscles': ['Rhomboids', 'Rear deltoids'],
        'muscle_groups': 'Upper body',
        'unilateral': False,
        'difficulty_rating': 4,
        'prerequisites': 'Basic shoulder mobility',
        'progressions': ['Prone Y-raises without weight'],
        'regressions': ['Face pulls', 'Band pull-aparts'],
        'equipment_required': ['Light dumbbells'],
        'equipment_optional': 'Incline bench',
        'setup_time': 2,
        'space_required': 'Moderate',
        'rep_range_min': 10,
        'rep_range_max': 20,
        'tempo': '2-2-2-1',
        'range_of_motion': 'Full',
        'compound_vs_isolation': 'Isolation',
        'injury_risk_level': 'Moderate',
        'contraindications': 'Knee injuries, balance issues',
        'common_mistakes': ['Using too heavy weight', 'Not controlling the descent', 'Poor balance'],
        'safety_notes': 'Ensure box is stable, start with lighter weights',
        'category': 'Strength',
        'training_style': ['Functional', 'Bodybuilding'],
        'experience_level': ['Intermediate', 'Advanced'],
        'goals': ['Strength', 'Hypertrophy', 'Functional'],
        'duration_minutes': 3,
        'popularity_score': 6,
        'alternatives': ['Step Up', 'Hammer Curl', 'Lunge with Curl'],
        'supersets_well_with': ['Chest Press', 'Plank']
    },
    {
    'name': 'Weighted Step Up',
    'description': 'Unilateral lower body exercise targeting quadriceps and glutes while holding additional weight.',
    'instructions': '1. Stand facing a box or bench holding dumbbells\n2. Step up with one foot, driving through heel\n3. Bring other foot up to standing position\n4. Step down with control, same foot first\n5. Complete all reps on one side before switching',
    'exercise_type': 'Free weights',
    'movement_pattern': 'Push',
    'primary_muscles': ['Quadriceps', 'Glutes'],
    'secondary_muscles': ['Hamstrings', 'Calves', 'Core'],
    'muscle_groups': 'Lower body',
    'unilateral': True,
    'difficulty_rating': 6,
    'prerequisites': 'Basic balance and coordination',
    'progressions': ['Bodyweight step up'],
    'regressions': ['Higher box height', 'Heavier weights', 'Explosive step ups'],
    'equipment_required': ['Box or bench', 'Dumbbells'],
    'equipment_optional': 'Barbell',
    'setup_time': 2,
    'space_required': 'Moderate',
    'rep_range_min': 8,
    'rep_range_max': 15,
    'tempo': '2-1-2-1',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'Moderate',
    'contraindications': 'Knee injuries, ankle instability',
    'common_mistakes': ['Pushing off back foot', 'Leaning forward excessively', 'Not controlling descent'],
    'safety_notes': 'Ensure box is stable, start with bodyweight only',
    'category': 'Strength',
    'training_style': ['Functional', 'Athletic'],
    'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
    'goals': ['Strength', 'Power', 'Functional'],
    'duration_minutes': 4,
    'popularity_score': 7,
    'alternatives': ['Lunges', 'Bulgarian split squats', 'Single leg squats'],
    'supersets_well_with': ['Push ups', 'Plank', 'Upper body exercises']
},
{
    'name': 'Hammer Curl',
    'description': 'Bicep isolation exercise using neutral grip to target biceps and forearms.',
    'instructions': '1. Stand with dumbbells at sides, palms facing body\n2. Keep elbows close to torso\n3. Curl weights up without rotating wrists\n4. Squeeze at top, maintain neutral grip\n5. Lower with control',
    'exercise_type': 'Free weights',
    'movement_pattern': 'Pull',
    'primary_muscles': ['Biceps', 'Brachialis'],
    'secondary_muscles': ['Brachioradialis', 'Forearms'],
    'muscle_groups': 'Upper body',
    'unilateral': False,
    'difficulty_rating': 3,
    'prerequisites': 'Basic arm strength',
    'progressions': ['Assisted curls with resistance bands'],
    'regressions': ['Cable hammer curls', 'Alternating hammer curls'],
    'equipment_required': ['Dumbbells'],
    'equipment_optional': 'Cable machine',
    'setup_time': 1,
    'space_required': 'Minimal',
    'rep_range_min': 8,
    'rep_range_max': 15,
    'tempo': '2-1-2-1',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Isolation',
    'injury_risk_level': 'Low',
    'contraindications': 'Elbow injuries, wrist pain',
    'common_mistakes': ['Swinging body', 'Using momentum', 'Partial range of motion'],
    'safety_notes': 'Keep core engaged, avoid ego lifting',
    'category': 'Strength',
    'training_style': ['Bodybuilding', 'General fitness'],
    'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
    'goals': ['Hypertrophy', 'Strength'],
    'duration_minutes': 3,
    'popularity_score': 8,
    'alternatives': ['Regular bicep curls', 'Cable curls', 'Chin ups'],
    'supersets_well_with': ['Tricep exercises', 'Shoulder exercises']
},
{
    'name': 'Back Bridge',
    'description': 'Bodyweight exercise targeting posterior chain through spinal extension and hip flexor stretching.',
    'instructions': '1. Lie on back, knees bent, feet flat\n2. Place hands by ears, fingers toward shoulders\n3. Press through hands and feet to lift body\n4. Form arch from hands to feet\n5. Hold position, breathe steadily',
    'exercise_type': 'Bodyweight',
    'movement_pattern': 'Extension',
    'primary_muscles': ['Erector spinae', 'Glutes'],
    'secondary_muscles': ['Shoulders', 'Triceps', 'Hip flexors'],
    'muscle_groups': 'Full body',
    'unilateral': False,
    'difficulty_rating': 8,
    'prerequisites': 'Good shoulder and spinal mobility',
    'progressions': ['Wall bridge', 'Bridge from knees'],
    'regressions': ['Bridge walks', 'Single arm/leg variations'],
    'equipment_required': [],
    'equipment_optional': 'Yoga mat',
    'setup_time': 1,
    'space_required': 'Moderate',
    'rep_range_min': 3,
    'rep_range_max': 10,
    'tempo': 'Hold 10-30 seconds',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'High',
    'contraindications': 'Neck injuries, shoulder impingement, lower back problems',
    'common_mistakes': ['Insufficient warm-up', 'Forcing the position', 'Poor hand placement'],
    'safety_notes': 'Requires extensive mobility work, progress gradually',
    'category': 'Flexibility',
    'training_style': ['Gymnastics', 'Yoga', 'Mobility'],
    'experience_level': ['Advanced'],
    'goals': ['Flexibility', 'Functional'],
    'duration_minutes': 5,
    'popularity_score': 4,
    'alternatives': ['Camel pose', 'Cobra stretch', 'Hip flexor stretches'],
    'supersets_well_with': ['Forward folds', 'Core exercises']
},
{
    'name': 'Tricep Pulldown',
    'description': 'Cable isolation exercise targeting triceps through elbow extension.',
    'instructions': '1. Stand at cable machine with rope or bar attachment\n2. Keep elbows close to sides\n3. Pull attachment down by extending forearms\n4. Squeeze triceps at bottom\n5. Return with control',
    'exercise_type': 'Cable',
    'movement_pattern': 'Push',
    'primary_muscles': ['Triceps'],
    'secondary_muscles': ['Anterior deltoids', 'Core'],
    'muscle_groups': 'Upper body',
    'unilateral': False,
    'difficulty_rating': 4,
    'prerequisites': 'Basic upper body strength',
    'progressions': ['Assisted dips'],
    'regressions': ['Single arm pulldowns', 'Overhead extensions'],
    'equipment_required': ['Cable machine', 'Rope or bar attachment'],
    'equipment_optional': 'Different attachments',
    'setup_time': 2,
    'space_required': 'Minimal',
    'rep_range_min': 10,
    'rep_range_max': 20,
    'tempo': '2-1-2-1',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Isolation',
    'injury_risk_level': 'Low',
    'contraindications': 'Elbow pain, shoulder injuries',
    'common_mistakes': ['Moving elbows', 'Using shoulders', 'Partial range of motion'],
    'safety_notes': 'Start light, focus on form over weight',
    'category': 'Strength',
    'training_style': ['Bodybuilding', 'General fitness'],
    'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
    'goals': ['Hypertrophy', 'Strength'],
    'duration_minutes': 3,
    'popularity_score': 8,
    'alternatives': ['Tricep dips', 'Close grip push ups', 'Overhead tricep extension'],
    'supersets_well_with': ['Bicep exercises', 'Chest exercises']
},
{
    'name': 'Single Leg Jump Plyometric',
    'description': 'Explosive unilateral plyometric exercise developing power and stability in single leg stance.',
    'instructions': '1. Stand on one leg with slight knee bend\n2. Jump vertically as high as possible\n3. Land softly on same leg with control\n4. Absorb impact through entire leg\n5. Reset and repeat',
    'exercise_type': 'Plyometric',
    'movement_pattern': 'Jump',
    'primary_muscles': ['Quadriceps', 'Glutes', 'Calves'],
    'secondary_muscles': ['Hamstrings', 'Core', 'Stabilizers'],
    'muscle_groups': 'Lower body',
    'unilateral': True,
    'difficulty_rating': 7,
    'prerequisites': 'Good single leg balance and strength',
    'progressions': ['Two leg jumps', 'Assisted single leg jumps'],
    'regressions': ['Multi-directional jumps', 'Weighted jumps'],
    'equipment_required': [],
    'equipment_optional': 'Soft landing surface',
    'setup_time': 1,
    'space_required': 'Moderate',
    'rep_range_min': 5,
    'rep_range_max': 10,
    'tempo': 'Explosive up, controlled landing',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'High',
    'contraindications': 'Knee injuries, ankle instability, balance issues',
    'common_mistakes': ['Hard landings', 'Poor balance', 'Insufficient knee bend'],
    'safety_notes': 'Master two-leg jumps first, ensure proper landing mechanics',
    'category': 'Power',
    'training_style': ['Athletic', 'Sports specific'],
    'experience_level': ['Intermediate', 'Advanced'],
    'goals': ['Power', 'Athletic performance'],
    'duration_minutes': 3,
    'popularity_score': 6,
    'alternatives': ['Two leg jumps', 'Single leg squats', 'Hop variations'],
    'supersets_well_with': ['Upper body exercises', 'Core work']
},
{
    'name': 'Single Leg Bounding Plyometric',
    'description': 'Horizontal plyometric exercise emphasizing forward propulsion and single leg power.',
    'instructions': '1. Start on one leg with forward lean\n2. Bound forward as far as possible\n3. Land on same leg with control\n4. Immediately bound again\n5. Continue for distance or reps',
    'exercise_type': 'Plyometric',
    'movement_pattern': 'Bound',
    'primary_muscles': ['Glutes', 'Hamstrings', 'Calves'],
    'secondary_muscles': ['Quadriceps', 'Core', 'Hip flexors'],
    'muscle_groups': 'Lower body',
    'unilateral': True,
    'difficulty_rating': 8,
    'prerequisites': 'Excellent single leg strength and coordination',
    'progressions': ['Walking lunges', 'Single leg hops in place'],
    'regressions': ['Alternating bounds', 'Speed bounds'],
    'equipment_required': [],
    'equipment_optional': 'Grass or track surface',
    'setup_time': 1,
    'space_required': 'Large',
    'rep_range_min': 3,
    'rep_range_max': 8,
    'tempo': 'Explosive, minimal ground contact',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'High',
    'contraindications': 'Knee injuries, ankle problems, poor balance',
    'common_mistakes': ['Too much vertical, not enough horizontal', 'Long ground contact time', 'Poor landing mechanics'],
    'safety_notes': 'Requires advanced athletic ability, proper surface essential',
    'category': 'Power',
    'training_style': ['Athletic', 'Sports specific'],
    'experience_level': ['Advanced'],
    'goals': ['Power', 'Speed', 'Athletic performance'],
    'duration_minutes': 4,
    'popularity_score': 5,
    'alternatives': ['Running bounds', 'Broad jumps', 'Single leg hops'],
    'supersets_well_with': ['Recovery exercises', 'Upper body work']
},
{
    'name': 'Box Drop Jump Plyometric',
    'description': 'Advanced plyometric exercise involving dropping from height and immediately jumping to develop reactive strength.',
    'instructions': '1. Stand on box 12-24 inches high\n2. Step off (don\'t jump off) landing on both feet\n3. Immediately jump vertically as high as possible\n4. Land softly with bent knees\n5. Reset by stepping back onto box',
    'exercise_type': 'Plyometric',
    'movement_pattern': 'Jump',
    'primary_muscles': ['Quadriceps', 'Glutes', 'Calves'],
    'secondary_muscles': ['Hamstrings', 'Core', 'Stabilizers'],
    'muscle_groups': 'Lower body',
    'unilateral': False,
    'difficulty_rating': 9,
    'prerequisites': 'Excellent jump mechanics and eccentric strength',
    'progressions': ['Basic jump squats', 'Box jumps'],
    'regressions': ['Higher boxes', 'Single leg variations'],
    'equipment_required': ['Sturdy box or platform'],
    'equipment_optional': 'Soft landing surface',
    'setup_time': 3,
    'space_required': 'Large',
    'rep_range_min': 3,
    'rep_range_max': 6,
    'tempo': 'Minimal ground contact time',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'Very High',
    'contraindications': 'Knee injuries, ankle problems, inexperience with plyometrics',
    'common_mistakes': ['Jumping off box instead of stepping', 'Too high starting height', 'Poor landing mechanics'],
    'safety_notes': 'Advanced exercise only, master basic plyometrics first',
    'category': 'Power',
    'training_style': ['Athletic', 'Sports specific'],
    'experience_level': ['Advanced'],
    'goals': ['Reactive strength', 'Power', 'Athletic performance'],
    'duration_minutes': 5,
    'popularity_score': 4,
    'alternatives': ['Depth jumps', 'Box jumps', 'Jump squats'],
    'supersets_well_with': ['Long rest periods required']
},

{
    'name': 'Weighted Lunge',
    'description': 'Unilateral lower body exercise targeting quadriceps and glutes while holding additional weight.',
    'instructions': '1. Stand holding dumbbells at sides or barbell across shoulders\n2. Step forward into lunge position\n3. Lower back knee toward ground\n4. Push through front heel to return to start\n5. Alternate legs or complete one side first',
    'exercise_type': 'Free weights',
    'movement_pattern': 'Push',
    'primary_muscles': ['Quadriceps', 'Glutes'],
    'secondary_muscles': ['Hamstrings', 'Calves', 'Core'],
    'muscle_groups': 'Lower body',
    'unilateral': True,
    'difficulty_rating': 5,
    'prerequisites': 'Basic balance and bodyweight lunge proficiency',
    'progressions': ['Bodyweight lunges', 'Static lunges'],
    'regressions': ['Walking lunges', 'Reverse lunges', 'Lateral lunges'],
    'equipment_required': ['Dumbbells or barbell'],
    'equipment_optional': 'Weight vest',
    'setup_time': 2,
    'space_required': 'Moderate',
    'rep_range_min': 8,
    'rep_range_max': 15,
    'tempo': '2-1-2-1',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'Moderate',
    'contraindications': 'Knee injuries, hip mobility issues, balance problems',
    'common_mistakes': ['Knee extending past toes', 'Leaning forward', 'Insufficient depth'],
    'safety_notes': 'Master bodyweight version first, keep torso upright',
    'category': 'Strength',
    'training_style': ['Functional', 'Athletic', 'Bodybuilding'],
    'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
    'goals': ['Strength', 'Hypertrophy', 'Functional'],
    'duration_minutes': 4,
    'popularity_score': 9,
    'alternatives': ['Split squats', 'Step ups', 'Bulgarian split squats'],
    'supersets_well_with': ['Upper body exercises', 'Core work', 'Calf raises']
},

{
    'name': 'Russian Twists',
    'description': 'Core exercise targeting obliques and transverse abdominis through rotational movement.',
    'instructions': '1. Sit with knees bent, feet lifted or on ground\n2. Lean back to 45-degree angle\n3. Hold weight or clasp hands together\n4. Rotate torso left and right\n5. Keep chest up and core engaged',
    'exercise_type': 'Bodyweight',
    'movement_pattern': 'Rotation',
    'primary_muscles': ['Obliques', 'Transverse abdominis'],
    'secondary_muscles': ['Rectus abdominis', 'Hip flexors'],
    'muscle_groups': 'Core',
    'unilateral': False,
    'difficulty_rating': 4,
    'prerequisites': 'Basic core strength',
    'progressions': ['Seated twists', 'Dead bug'],
    'regressions': ['Weighted Russian twists', 'Feet elevated twists'],
    'equipment_required': [],
    'equipment_optional': 'Medicine ball, dumbbell, or weight plate',
    'setup_time': 1,
    'space_required': 'Minimal',
    'rep_range_min': 15,
    'rep_range_max': 30,
    'tempo': '1-0-1-0',
    'range_of_motion': 'Full rotation',
    'compound_vs_isolation': 'Isolation',
    'injury_risk_level': 'Low',
    'contraindications': 'Lower back injuries, neck problems',
    'common_mistakes': ['Pulling on neck', 'Moving too fast', 'Not engaging core'],
    'safety_notes': 'Keep movements controlled, avoid excessive spinal flexion',
    'category': 'Strength',
    'training_style': ['Core training', 'General fitness'],
    'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
    'goals': ['Core strength', 'Stability', 'Functional'],
    'duration_minutes': 3,
    'popularity_score': 8,
    'alternatives': ['Bicycle crunches', 'Wood chops', 'Plank variations'],
    'supersets_well_with': ['Planks', 'Lower body exercises', 'Cardio']
},
{
    'name': 'Banded Fire Hydrant',
    'description': 'Hip abduction exercise using resistance band to target glute medius and improve hip stability.',
    'instructions': '1. Start on hands and knees with band around thighs\n2. Keep core engaged and spine neutral\n3. Lift one knee out to side against band resistance\n4. Hold briefly at top\n5. Lower with control and repeat',
    'exercise_type': 'Resistance band',
    'movement_pattern': 'Abduction',
    'primary_muscles': ['Glute medius', 'Glute minimus'],
    'secondary_muscles': ['Glute maximus', 'Core', 'Hip stabilizers'],
    'muscle_groups': 'Lower body',
    'unilateral': True,
    'difficulty_rating': 3,
    'prerequisites': 'Basic hip mobility',
    'progressions': ['Bodyweight fire hydrants'],
    'regressions': ['Pulse variations', 'Heavier resistance bands'],
    'equipment_required': ['Resistance band'],
    'equipment_optional': 'Exercise mat',
    'setup_time': 2,
    'space_required': 'Minimal',
    'rep_range_min': 12,
    'rep_range_max': 20,
    'tempo': '2-1-2-1',
    'range_of_motion': 'Full hip abduction',
    'compound_vs_isolation': 'Isolation',
    'injury_risk_level': 'Low',
    'contraindications': 'Knee injuries, hip impingement, wrist pain',
    'common_mistakes': ['Rotating pelvis', 'Lifting leg too high', 'Arching back'],
    'safety_notes': 'Keep pelvis square, focus on glute activation',
    'category': 'Strength',
    'training_style': ['Rehabilitation', 'Activation', 'Functional'],
    'experience_level': ['Beginner', 'Intermediate'],
    'goals': ['Hip stability', 'Glute activation', 'Injury prevention'],
    'duration_minutes': 3,
    'popularity_score': 7,
    'alternatives': ['Side-lying leg lifts', 'Clamshells', 'Lateral walks'],
    'supersets_well_with': ['Glute bridges', 'Squats', 'Other activation exercises']
}
,
{
    'name': 'Pushup',
    'description': 'Classic bodyweight exercise targeting chest, shoulders, and triceps through horizontal pushing movement.',
    'instructions': '1. Start in plank position with hands shoulder-width apart\n2. Lower body until chest nearly touches ground\n3. Keep body in straight line from head to heels\n4. Push back up to starting position\n5. Maintain core engagement throughout',
    'exercise_type': 'Bodyweight',
    'movement_pattern': 'Push',
    'primary_muscles': ['Pectorals', 'Anterior deltoids', 'Triceps'],
    'secondary_muscles': ['Core', 'Serratus anterior', 'Upper back'],
    'muscle_groups': 'Upper body',
    'unilateral': False,
    'difficulty_rating': 4,
    'prerequisites': 'Basic upper body and core strength',
    'progressions': ['Wall pushups', 'Incline pushups', 'Knee pushups'],
    'regressions': ['Diamond pushups', 'One-arm pushups', 'Weighted pushups'],
    'equipment_required': [],
    'equipment_optional': 'Exercise mat',
    'setup_time': 1,
    'space_required': 'Minimal',
    'rep_range_min': 8,
    'rep_range_max': 25,
    'tempo': '2-0-2-0',
    'range_of_motion': 'Full',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'Low',
    'contraindications': 'Wrist injuries, shoulder impingement, lower back pain',
    'common_mistakes': ['Sagging hips', 'Flaring elbows too wide', 'Partial range of motion'],
    'safety_notes': 'Keep neutral spine, modify if wrist pain occurs',
    'category': 'Strength',
    'training_style': ['Calisthenics', 'General fitness', 'Military training'],
    'experience_level': ['Beginner', 'Intermediate', 'Advanced'],
    'goals': ['Strength', 'Endurance', 'Functional'],
    'duration_minutes': 2,
    'popularity_score': 10,
    'alternatives': ['Chest press', 'Dumbbell press', 'Dips'],
    'supersets_well_with': ['Pull exercises', 'Core work', 'Lower body exercises']
},
{
    'name': 'Wood Chop Oblique Twist',
    'description': 'Dynamic rotational core exercise targeting obliques through diagonal movement pattern mimicking wood chopping motion.',
    'instructions': '1. Stand with feet shoulder-width apart holding weight\n2. Start with weight at one shoulder\n3. Rotate and chop diagonally across body to opposite knee\n4. Engage core and pivot on back foot\n5. Return to start position with control',
    'exercise_type': 'Free weights',
    'movement_pattern': 'Rotation',
    'primary_muscles': ['Obliques', 'Transverse abdominis'],
    'secondary_muscles': ['Rectus abdominis', 'Shoulders', 'Hip flexors'],
    'muscle_groups': 'Core',
    'unilateral': False,
    'difficulty_rating': 5,
    'prerequisites': 'Good core stability and coordination',
    'progressions': ['Bodyweight wood chops', 'Half wood chops'],
    'regressions': ['Cable wood chops', 'Medicine ball slams'],
    'equipment_required': ['Dumbbell or medicine ball'],
    'equipment_optional': 'Cable machine',
    'setup_time': 2,
    'space_required': 'Moderate',
    'rep_range_min': 10,
    'rep_range_max': 20,
    'tempo': '2-0-2-1',
    'range_of_motion': 'Full diagonal rotation',
    'compound_vs_isolation': 'Compound',
    'injury_risk_level': 'Moderate',
    'contraindications': 'Lower back injuries, shoulder impingement, hip problems',
    'common_mistakes': ['Using only arms', 'Moving too fast', 'Not engaging core'],
    'safety_notes': 'Start with light weight, control the movement throughout',
    'category': 'Strength',
    'training_style': ['Functional', 'Athletic', 'Core training'],
    'experience_level': ['Intermediate', 'Advanced'],
    'goals': ['Core strength', 'Power', 'Functional movement'],
    'duration_minutes': 3,
    'popularity_score': 7,
    'alternatives': ['Russian twists', 'Cable rotations', 'Bicycle crunches'],
    'supersets_well_with': ['Planks', 'Anti-rotation exercises', 'Compound movements']
}

]

In [2]:
import sqlite3
import json
import utils.db_utils
from config import Config

def add_exercise(data):
    with sqlite3.connect(Config.DB_PATH) as conn:
        c = conn.cursor()
        c.execute('''
        INSERT INTO exercises (
            name, description, instructions, exercise_type, movement_pattern,
            primary_muscles, secondary_muscles, muscle_groups, unilateral,
            difficulty_rating, prerequisites, progressions, regressions,
            equipment_required, equipment_optional, setup_time, space_required,
            rep_range_min, rep_range_max, tempo, range_of_motion, compound_vs_isolation,
            injury_risk_level, contraindications, common_mistakes, safety_notes,
            image_url, video_url, gif_url, diagram_url,
            category, training_style, experience_level, goals,
            duration_minutes, popularity_score, alternatives, supersets_well_with
        ) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
        ''', [
            data.get('name'), data.get('description'), data.get('instructions'), data.get('exercise_type'), data.get('movement_pattern'),
            json.dumps(data.get('primary_muscles')), json.dumps(data.get('secondary_muscles')), data.get('muscle_groups'), data.get('unilateral'),
            data.get('difficulty_rating'), data.get('prerequisites'), json.dumps(data.get('progressions')), json.dumps(data.get('regressions')),
            json.dumps(data.get('equipment_required')), data.get('equipment_optional'), data.get('setup_time'), data.get('space_required'),
            data.get('rep_range_min'), data.get('rep_range_max'), data.get('tempo'), data.get('range_of_motion'), data.get('compound_vs_isolation'),
            data.get('injury_risk_level'), data.get('contraindications'), json.dumps(data.get('common_mistakes')), data.get('safety_notes'),
            data.get('image_url'), data.get('video_url'), data.get('gif_url'), data.get('diagram_url'),
            data.get('category'), json.dumps(data.get('training_style')), json.dumps(data.get('experience_level')), json.dumps(data.get('goals')),
            data.get('duration_minutes'), data.get('popularity_score'), json.dumps(data.get('alternatives')), json.dumps(data.get('supersets_well_with'))
        ])
        conn.commit()

# Function to populate database
def populate_exercise_database():
    """
    Populate the exercise database with all exercises.
    Call this function to insert all exercise data into your database.
    """
    print("Starting to populate exercise database...")
    
    for i, exercise in enumerate(exercises_data, 1):
        try:
            add_exercise(exercise)
            print(f"âœ“ Added exercise {i}/{len(exercises_data)}: {exercise['name']}")
        except Exception as e:
            print(f"âœ— Failed to add {exercise['name']}: {str(e)}")
    
    print(f"\nDatabase population complete! Added {len(exercises_data)} exercises.")

# Function to add individual exercises (useful for testing)
def add_single_exercise(exercise_name):
    """
    Add a single exercise by name for testing purposes.
    """
    exercise = next((ex for ex in exercises_data if ex['name'] == exercise_name), None)
    if exercise:
        try:
            add_exercise(exercise)
            print(f"âœ“ Successfully added: {exercise_name}")
        except Exception as e:
            print(f"âœ— Failed to add {exercise_name}: {str(e)}")
    else:
        print(f"Exercise '{exercise_name}' not found in dataset")

# Function to get exercise data (useful for inspection)
def get_exercise_data(exercise_name):
    """
    Get the data dictionary for a specific exercise.
    """
    return next((ex for ex in exercises_data if ex['name'] == exercise_name), None)

In [1]:
from services import runstrong_service 

build = runstrong_service.RunStrongService('strava_data.db')
# build.initialize_runstrong_database()
x = build.run_daily_update()

2025-06-05 20:15:39,934 [INFO] Starting daily fatigue tracking update...
2025-06-05 20:15:39,946 [INFO] Sample data inserted
2025-06-05 20:15:39,948 [INFO] Updating muscle group fatigue...
2025-06-05 20:15:39,950 [INFO] Processing muscle groups: ['Rear Deltoids', 'Rhomboids', 'Brachioradialis', 'Upper Body', 'Glute Maximus', 'Levator Scapulae', 'Lower Body', 'Latissimus Dorsi', 'Ankles', 'Shoulders', 'Lower Back', 'Upper Trapezius', 'Obliques', 'Full Body', 'Stabilizers', 'Erector Spinae', 'Calves', 'Back', 'Quadriceps', 'Glute Minimus', 'Lower Abdominals', 'Hip Flexors', 'Brachialis', 'Forearms', 'Glutes', 'Deep Abdominals', 'Transverse Abdominis', 'Hamstrings', 'Hip Stabilizers', 'Lower Trapezius', 'Glute Medius', 'Serratus Anterior', 'Middle Trapezius', 'Rectus Abdominis', 'Chest', 'Arms', 'Core', 'Pectorals', 'Achilles Tendon', 'Upper Back', 'Anterior Deltoids']
2025-06-05 20:15:39,951 [INFO] Processing muscle group: Rear Deltoids
2025-06-05 20:15:39,954 [INFO] Processing muscle gr

In [14]:
print(len(exercises_data[0]))

34


In [3]:
populate_exercise_database()

Starting to populate exercise database...
âœ— Failed to add Box Jump: UNIQUE constraint failed: exercises.name
âœ— Failed to add Kettlebell Romanian Deadlift with Dumbbells: UNIQUE constraint failed: exercises.name
âœ— Failed to add Deadlift: UNIQUE constraint failed: exercises.name
âœ— Failed to add Pull-up: UNIQUE constraint failed: exercises.name
âœ— Failed to add Plank: UNIQUE constraint failed: exercises.name
âœ— Failed to add Banded Clams: UNIQUE constraint failed: exercises.name
âœ— Failed to add Leg Raise: UNIQUE constraint failed: exercises.name
âœ— Failed to add Lateral Pull Down: UNIQUE constraint failed: exercises.name
âœ— Failed to add Weighted Bar Squat: UNIQUE constraint failed: exercises.name
âœ— Failed to add Eccentric Heel Drop: UNIQUE constraint failed: exercises.name
âœ— Failed to add Bulgarian Split Squat: UNIQUE constraint failed: exercises.name
âœ— Failed to add Single-Arm Press With Balance: UNIQUE constraint failed: exercises.name
âœ— Failed to add Weighted Off

In [None]:
import sqlite3
from config import Config
import pandas as pd
from utils import db_utils

with db_utils.get_db_connection(Config.DB_PATH) as conn:
        c = conn.cursor()
        output = c.execute("""
select * from activities order by start_date_local desc limit 3""").fetchall()
        # conn.commit()
        # query = """
        # SELECT date, total_tss, ctl, atl, tsb
        # FROM daily_training_metrics
        # ORDER BY date DESC
        # """

        # df = pd.read_sql(query, conn)
        df_2 = db_utils.get_latest_daily_training_metrics(conn=conn)
# print(db_eval)
print(output[0])
# print(round(df['total_tss']), 1)
print(pd.DataFrame.from_dict(df_2[0], orient='index').loc["ctl"])

{'id': 14695490446, 'resource_state': 2, 'athlete_id': 24266563, 'athlete_resource_state': 1, 'name': 'Morning Run', 'distance': 10284.1, 'moving_time': 3089, 'elapsed_time': 3089, 'total_elevation_gain': 35.0, 'type': 'Run', 'sport_type': 'Run', 'workout_type': 0, 'start_date': '2025-06-04T15:14:08Z', 'start_date_local': '2025-06-04T10:14:08Z', 'timezone': '(GMT-06:00) America/Chicago', 'utc_offset': -18000.0, 'location_city': None, 'location_state': None, 'location_country': None, 'achievement_count': 0, 'kudos_count': 1, 'comment_count': 0, 'athlete_count': 1, 'photo_count': 0, 'map_id': 'a14695490446', 'map_summary_polyline': '{}jqGnuqxPCoCi@uDKaCB}@D]XyA^_AT_@J[VYHCpAT`@Ax@KnBa@VCJCRO^KDIAOSu@SiCA]@MTg@F[Aa@IeA?_@L}@@qBRaCJkGD}FDaBPsBJcCCcAFuBAcARiDHm@Am@GQEEE?GFCJ?LDLR\\RRHDJ@JEj@k@VIJAL@VJZZNZBNBjCj@hCN\\JAL_@PyA`@cADO?KAKGCGBUZWp@sAfFSfAUx@UbBOvD@lACv@Br@Jl@XrCd@lCb@tBNbBX|AXz@j@x@t@nBn@z@`@z@JRJl@DJPR`An@l@f@`BdB~ArBx@z@dCpBpAz@lAj@jBh@rAZ`@FrBRj@Ll@FlABb@Fp@@h@Fz@Bv@FvAGn@KbA

: 

In [10]:
db_eval

[{'name': 'activities'},
 {'name': 'streams'},
 {'name': 'gear'},
 {'name': 'daily_training_metrics'},
 {'name': 'sqlite_sequence'},
 {'name': 'workout_routines'},
 {'name': 'routine_exercises'},
 {'name': 'workout_performance'},
 {'name': 'exercises'},
 {'name': 'conversations'}]

In [None]:
[{'name': 'activities'},
 {'name': 'streams'},
 {'name': 'gear'},
 {'name': 'daily_training_metrics'},
 {'name': 'sqlite_sequence'},
 {'name': 'workout_routines'},
 {'name': 'routine_exercises'},
 {'name': 'workout_performance'},
 {'name': 'exercises'}]

In [None]:
[('main', 'routine_exercises', 'table', 8, 0, 0),
 ('main', 'workout_routines', 'table', 3, 0, 0),
 ('main', 'exercises', 'table', 39, 0, 0),
 ('main', 'sqlite_sequence', 'table', 2, 0, 0),
 ('main', 'sqlite_schema', 'table', 5, 0, 0),
 ('temp', 'sqlite_temp_schema', 'table', 5, 0, 0)]

## Daily Metrics Load

In [1]:
from config import Config
import pandas as pd

from utils import db_utils

df = db_utils.get_ctl_atl_tsb_tss_data().tail(1)
try:
    with db_utils.get_db_connection(Config.DB_PATH) as conn:
        db_utils.update_daily_training_metrics(conn=conn, df=df)
except Exception as e:
    print(f'db write failed: {e}')

The behavior will change in pandas 3.0. This inplace method will never work because the intermediate object on which we are setting values always behaves as a copy.

For example, when doing 'df[col].method(value, inplace=True)', try using 'df.method({col: value}, inplace=True)' or df[col] = df[col].method(value) instead, to perform the operation inplace on the original object.


  daily_df['tss'].fillna(0, inplace=True)


In [2]:
print(df.tail(1))

         date  tss        CTL        ATL       TSB
90 2025-06-04  0.0  66.562369  65.019856  1.542514


In [5]:
from utils import language_model_utils
coach_g = language_model_utils.LanguageModel()
print(coach_g.generate_daily_training_summary())

  from .autonotebook import tqdm as notebook_tqdm
Note: Environment variable`HF_TOKEN` is set and is the current active token independently from the token you've just configured.
Loading checkpoint shards: 100%|â–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆâ–ˆ| 2/2 [00:00<00:00,  2.06it/s]
Device set to use cpu


LanguageModelError: Failed to extract SQL: Cannot operate on a closed database.

In [7]:
def initialize_conversation_database():
    """Create the SQLite database and full 'activities' table."""
    conn = sqlite3.connect(Config.DB_PATH)
    c = conn.cursor()

    c.execute('''
    CREATE TABLE conversations (
    id INTEGER PRIMARY KEY AUTOINCREMENT,
    session_id TEXT,
    role TEXT, -- "user" or "coach"
    message TEXT,
    timestamp DATETIME DEFAULT CURRENT_TIMESTAMP
);
    ''')

    conn.commit()

In [8]:
initialize_conversation_database()

## Strava API Update catchup

After running all this, possible false alarm? Or strava updated their API docs without rolling out changes? 

Either way this works, and if needed in the future, just remove the constraints on running this for activities that were previously missing the columns and let it blast through all 2500 or so in a few calls

Update: You misread the documentation by looking at the activity/id type of data pull, not athlete/activity section. Still not fully matching up but if we ever want all that data, this will make it happen 

In [30]:
import sqlite3
import requests
import json
import os
import logging
from datetime import timedelta
from datetime import datetime
from typing import Dict, List, Optional, Any, Union
from dotenv import load_dotenv

from utils import db_utils
from config import Config

# Configure module-level logger
logger = logging.getLogger(__name__)

In [26]:
def backfill_activities_from_api(access_token: str, db_path: str, 
                                 activity_ids: List[int] = None,
                                 **optional_parameters) -> int:
    """
    Fetch activities from Strava API and update database with new fields.
    
    Args:
        access_token: Valid Strava access token
        db_path: Path to the SQLite database
        activity_ids: Specific activity IDs to update (if None, fetches all)
        **optional_parameters: Additional query parameters (before, after timestamps)
        
    Returns:
        Number of activities successfully updated
        
    Raises:
        requests.HTTPError: If API request fails
        sqlite3.Error: If database operations fail
    """
    
    def get_activities_page(page: int = 1, per_page: int = 30) -> List[Dict[str, Any]]:
        """Internal function to fetch a single page of activities."""
        url = "https://www.strava.com/api/v3/athlete/activities"
        headers = {"Authorization": f"Bearer {access_token}"}
        
        params = {
            "page": page,
            "per_page": min(per_page, 200)
        }
        params.update(optional_parameters)
        
        try:
            logger.debug(f"Fetching activities page {page} with params: {params}")
            response = requests.get(url, headers=headers, params=params, timeout=30)
            response.raise_for_status()
            
            activities = response.json()
            logger.info(f"Successfully fetched {len(activities)} activities on page {page}")
            return activities
            
        except requests.exceptions.Timeout:
            logger.error("Request timeout when fetching activities")
            raise
        except requests.exceptions.HTTPError as e:
            logger.error(f"HTTP error fetching activities: {e}")
            raise
        except requests.exceptions.RequestException as e:
            logger.error(f"Request error fetching activities: {e}")
            raise
    
    def update_activities_in_db(activity_list: List[Dict[str, Any]]) -> int:
        """Internal function to update activities in database."""
        if not activity_list:
            return 0
            
        try:
            with sqlite3.connect(db_path) as conn:
                cursor = conn.cursor()
                
                data = []
                for activity in activity_list:
                    # Skip if filtering by activity_ids and this isn't in the list
                    if activity_ids and activity.get("id") not in activity_ids:
                        continue
                        
                    gear_data = activity.get("gear", {})
                    photos_data = activity.get("photos", {})
                    
                    data.append({
                        "id": activity.get("id"),
                        "average_temp": activity.get("average_temp"),
                        "calories": activity.get("calories"),
                        "suffer_score": activity.get("suffer_score"),
                        "description": activity.get("description"),
                        "segment_efforts": json.dumps(activity.get("segment_efforts")) if activity.get("segment_efforts") else None,
                        "splits_metric": json.dumps(activity.get("splits_metric")) if activity.get("splits_metric") else None,
                        "laps": json.dumps(activity.get("laps")) if activity.get("laps") else None,
                        "gear": json.dumps(gear_data) if gear_data else None,
                        "partner_brand_tag": activity.get("partner_brand_tag"),
                        "photos": json.dumps(photos_data) if photos_data else None,
                        "highlighted_kudosers": json.dumps(activity.get("highlighted_kudosers")) if activity.get("highlighted_kudosers") else None,
                        "hide_from_home": activity.get("hide_from_home"),
                        "device_name": activity.get("device_name"),
                        "embed_token": activity.get("embed_token"),
                        "segment_leaderboard_opt_out": activity.get("segment_leaderboard_opt_out"),
                        "leaderboard_opt_out": activity.get("leaderboard_opt_out")
                    })
                
                if not data:
                    return 0
                
                cursor.executemany('''
                    UPDATE activities SET
                        average_temp = :average_temp,
                        calories = :calories,
                        suffer_score = :suffer_score,
                        description = :description,
                        segment_efforts = :segment_efforts,
                        splits_metric = :splits_metric,
                        laps = :laps,
                        gear = :gear,
                        partner_brand_tag = :partner_brand_tag,
                        photos = :photos,
                        highlighted_kudosers = :highlighted_kudosers,
                        hide_from_home = :hide_from_home,
                        device_name = :device_name,
                        embed_token = :embed_token,
                        segment_leaderboard_opt_out = :segment_leaderboard_opt_out,
                        leaderboard_opt_out = :leaderboard_opt_out
                    WHERE id = :id
                ''', data)
                
                rows_affected = cursor.rowcount
                conn.commit()
                return rows_affected
                
        except sqlite3.Error as e:
            logger.error(f"Database error during activity update: {e}")
            raise
    
    # Main execution
    total_updated = 0
    api_calls_used = 0
    
    try:
        if activity_ids:
            # If specific IDs provided, we still need to fetch from API
            # Strava doesn't support filtering by ID in list activities endpoint
            # So we fetch pages until we find all requested IDs
            page = 1
            found_ids = set()
            
            while len(found_ids) < len(activity_ids) and api_calls_used < 900:  # Leave buffer
                activities = get_activities_page(page=page, per_page=200)
                api_calls_used += 1
                
                if not activities:  # No more activities
                    break
                    
                # Check which requested IDs are in this batch
                page_ids = {act.get("id") for act in activities if act.get("id") in activity_ids}
                found_ids.update(page_ids)
                
                if page_ids:  # Only update if we found requested activities
                    updated = update_activities_in_db(activities)
                    total_updated += updated
                    logger.info(f"Updated {updated} activities from page {page}")
                
                page += 1
                
        else:
            # Fetch all activities with pagination
            page = 1
            while api_calls_used < 900:  # Leave buffer for rate limit
                activities = get_activities_page(page=page, per_page=200)
                api_calls_used += 1
                
                if not activities:  # No more activities
                    break
                    
                updated = update_activities_in_db(activities)
                total_updated += updated
                logger.info(f"Updated {updated} activities from page {page}")
                
                page += 1
        
        logger.info(f"Backfill complete. Updated {total_updated} activities using {api_calls_used} API calls")
        return total_updated
        
    except Exception as e:
        logger.error(f"Error during backfill operation: {e}")
        raise

In [27]:
def get_activities_needing_update(db_path: str) -> List[int]:
    """
    Get list of activity IDs that need new field updates.
    
    Returns:
        List of activity IDs missing new field data
    """
    try:
        with sqlite3.connect(db_path) as conn:
            cursor = conn.cursor()
            
            # Find activities missing new field data (assuming average_temp is a good indicator)
            cursor.execute('''
                SELECT id FROM activities 
                WHERE average_temp IS NULL 
                   OR calories IS NULL 
                   OR device_name IS NULL
                ORDER BY start_date DESC
            ''')
            
            return [row[0] for row in cursor.fetchall()]
            
    except sqlite3.Error as e:
        logger.error(f"Database error getting activities needing update: {e}")
        raise

def prioritized_backfill_strategy(access_token: str, db_path: str, 
                                 max_api_calls: int = 900) -> int:
    """
    Efficient backfill strategy that prioritizes recent activities.
    
    Args:
        access_token: Valid Strava access token
        db_path: Path to the SQLite database
        max_api_calls: Maximum API calls to use (default: 900 to leave buffer)
        
    Returns:
        Number of activities successfully updated
    """
    
    # Strategy 1: Get activities that definitely need updates
    activities_needing_update = get_activities_needing_update(db_path)
    
    if not activities_needing_update:
        logger.info("All activities already have new field data")
        return 0
    
    logger.info(f"Found {len(activities_needing_update)} activities needing updates")
    
    # Strategy 2: Prioritize recent activities (last 6 months)
    six_months_ago = datetime.now() - timedelta(days=180)
    six_months_timestamp = int(six_months_ago.timestamp())
    
    # Start with recent activities first
    recent_updated = backfill_activities_from_api(
        access_token=access_token,
        db_path=db_path,
        activity_ids=activities_needing_update[:100],  # Limit to manage API calls
        after=six_months_timestamp
    )
    
    # Check remaining API call budget
    # Estimate: ~4-5 API calls per 200 activities (depends on activity density)
    remaining_calls = max_api_calls - (len(activities_needing_update[:100]) // 40)
    
    if remaining_calls > 50:
        # Strategy 3: Fill remaining budget with older activities
        older_activities = activities_needing_update[100:]
        if older_activities:
            older_updated = backfill_activities_from_api(
                access_token=access_token,
                db_path=db_path,
                activity_ids=older_activities[:remaining_calls * 40],  # Rough estimate
                before=six_months_timestamp
            )
            return recent_updated + older_updated
    
    return recent_updated

In [None]:
# Update only activities missing new data
total_updated = prioritized_backfill_strategy(access_token = ACCESS_TOKEN, db_path=Config.DB_PATH, max_api_calls=900)

In [None]:
j = get_activities(access_token=ACCESS_TOKEN, page = 1, per_page= 1)

In [44]:
j

[{'resource_state': 2,
  'athlete': {'id': 24266563, 'resource_state': 1},
  'name': 'Morning Run',
  'distance': 10284.1,
  'moving_time': 3089,
  'elapsed_time': 3089,
  'total_elevation_gain': 35.0,
  'type': 'Run',
  'sport_type': 'Run',
  'workout_type': 0,
  'id': 14695490446,
  'start_date': '2025-06-04T15:14:08Z',
  'start_date_local': '2025-06-04T10:14:08Z',
  'timezone': '(GMT-06:00) America/Chicago',
  'utc_offset': -18000.0,
  'location_city': None,
  'location_state': None,
  'location_country': None,
  'achievement_count': 0,
  'kudos_count': 13,
  'comment_count': 0,
  'athlete_count': 1,
  'photo_count': 0,
  'map': {'id': 'a14695490446',
   'summary_polyline': '{}jqGnuqxPCoCi@uDKaCB}@D]XyA^_AT_@J[VYHCpAT`@Ax@KnBa@VCJCRO^KDIAOSu@SiCA]@MTg@F[Aa@IeA?_@L}@@qBRaCJkGD}FDaBPsBJcCCcAFuBAcARiDHm@Am@GQEEE?GFCJ?LDLR\\RRHDJ@JEj@k@VIJAL@VJZZNZBNBjCj@hCN\\JAL_@PyA`@cADO?KAKGCGBUZWp@sAfFSfAUx@UbBOvD@lACv@Br@Jl@XrCd@lCb@tBNbBX|AXz@j@x@t@nBn@z@`@z@JRJl@DJPR`An@l@f@`BdB~ArBx@z@dCpBpAz@l

In [None]:
j2 = get_activities(access_token=ACCESS_TOKEN, page = 1, per_page= 1, include_all_efforts = True)

In [46]:
j2

[{'resource_state': 2,
  'athlete': {'id': 24266563, 'resource_state': 1},
  'name': 'Morning Run',
  'distance': 10284.1,
  'moving_time': 3089,
  'elapsed_time': 3089,
  'total_elevation_gain': 35.0,
  'type': 'Run',
  'sport_type': 'Run',
  'workout_type': 0,
  'id': 14695490446,
  'start_date': '2025-06-04T15:14:08Z',
  'start_date_local': '2025-06-04T10:14:08Z',
  'timezone': '(GMT-06:00) America/Chicago',
  'utc_offset': -18000.0,
  'location_city': None,
  'location_state': None,
  'location_country': None,
  'achievement_count': 0,
  'kudos_count': 13,
  'comment_count': 0,
  'athlete_count': 1,
  'photo_count': 0,
  'map': {'id': 'a14695490446',
   'summary_polyline': '{}jqGnuqxPCoCi@uDKaCB}@D]XyA^_AT_@J[VYHCpAT`@Ax@KnBa@VCJCRO^KDIAOSu@SiCA]@MTg@F[Aa@IeA?_@L}@@qBRaCJkGD}FDaBPsBJcCCcAFuBAcARiDHm@Am@GQEEE?GFCJ?LDLR\\RRHDJ@JEj@k@VIJAL@VJZZNZBNBjCj@hCN\\JAL_@PyA`@cADO?KAKGCGBUZWp@sAfFSfAUx@UbBOvD@lACv@Br@Jl@XrCd@lCb@tBNbBX|AXz@j@x@t@nBn@z@`@z@JRJl@DJPR`An@l@f@`BdB~ArBx@z@dCpBpAz@l

In [None]:
"""
RunStrong Database Schema Migration and Utilities - CORRECTED & ROBUST (v3)

Migrates from a denormalized wide 'exercises' table to a properly normalized schema.
This script ensures 100% data integrity by using a map-based strategy to handle
duplicate or messy data in the original table, ensuring all historical workout
data is correctly re-linked.

Corrections implemented in v3:
- Re-architected the exercise migration logic to insert exercises one-by-one
  and capture their new IDs immediately using cursor.lastrowid.
- This completely avoids the issue of a SELECT not seeing data from a pending
  INSERT within the same transaction, which was causing the ID map to be empty
  and preventing dependent data (workouts, routines) from being migrated.
"""
import sqlite3
import json
import logging
import shutil
from typing import Dict, List, Set, Any, Optional
from contextlib import contextmanager
from datetime import datetime
from collections import defaultdict

# --- Basic Setup ---
logging.basicConfig(level=logging.INFO, format='%(asctime)s - %(levelname)s - %(message)s')
logger = logging.getLogger(__name__)

# --- Utility Functions ---

@contextmanager
def db_connection(db_path: str):
    """Context manager for SQLite connection with foreign keys enabled."""
    conn = None
    try:
        conn = sqlite3.connect(db_path)
        conn.execute("PRAGMA foreign_keys = ON")
        conn.row_factory = sqlite3.Row
        yield conn
    except sqlite3.Error as e:
        logger.error(f"Database connection or operation failed: {e}", exc_info=True)
        if conn:
            conn.rollback()
        raise
    finally:
        if conn:
            conn.close()


def backup_database(db_path: str) -> Optional[str]:
    """Create a timestamped backup of the database."""
    if not db_path:
        logger.error("DB path is not specified. Cannot create backup.")
        return None
    try:
        timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
        backup_path = f"{db_path}.backup_{timestamp}"
        shutil.copy2(db_path, backup_path)
        logger.info(f"Database backed up to: {backup_path}")
        return backup_path
    except FileNotFoundError:
        logger.error(f"Source database file not found at {db_path}. Cannot create backup.")
        return None
    except Exception as e:
        logger.error(f"Failed to create backup: {e}", exc_info=True)
        return None

# --- Migration Class ---

class RunStrongMigration:
    """Handles migration from a denormalized to a normalized schema efficiently and safely."""

    def __init__(self, db_path: str):
        self.db_path = db_path
        self.maps = {
            "old_id_to_new_id": {},
            "old_name_to_old_id": {},
            "muscle_name_to_id": {},
            "equipment_name_to_id": {},
            "goal_name_to_id": {},
        }

    def _create_normalized_schema(self, conn: sqlite3.Connection):
        """Creates all new tables with a '_new' suffix and CORRECT foreign key constraints."""
        cursor = conn.cursor()
        logger.info("Creating new normalized schema...")
        # (Schema creation code is unchanged and correct)
        cursor.execute('CREATE TABLE IF NOT EXISTS muscle_groups (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, category TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)')
        cursor.execute('CREATE TABLE IF NOT EXISTS equipment (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, category TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)')
        cursor.execute('CREATE TABLE IF NOT EXISTS training_goals (id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, description TEXT, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP)')
        cursor.execute('''
            CREATE TABLE IF NOT EXISTS exercises_new (
                id INTEGER PRIMARY KEY, name TEXT UNIQUE NOT NULL, description TEXT,
                instructions TEXT, unilateral BOOLEAN DEFAULT 0, difficulty_rating TEXT,
                rep_range_min INTEGER, rep_range_max INTEGER, tempo TEXT,
                compound_vs_isolation TEXT, image_url TEXT, video_url TEXT,
                popularity_score INTEGER DEFAULT 0, created_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP,
                updated_at TIMESTAMP DEFAULT CURRENT_TIMESTAMP
            )
        ''')
        cursor.execute('CREATE TABLE IF NOT EXISTS exercise_muscle_groups (exercise_id INTEGER NOT NULL, muscle_group_id INTEGER NOT NULL, is_primary BOOLEAN DEFAULT 1, PRIMARY KEY (exercise_id, muscle_group_id, is_primary), FOREIGN KEY (exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE, FOREIGN KEY (muscle_group_id) REFERENCES muscle_groups(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS exercise_equipment (exercise_id INTEGER NOT NULL, equipment_id INTEGER NOT NULL, is_required BOOLEAN DEFAULT 1, PRIMARY KEY (exercise_id, equipment_id, is_required), FOREIGN KEY (exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE, FOREIGN KEY (equipment_id) REFERENCES equipment(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS exercise_goals (exercise_id INTEGER NOT NULL, goal_id INTEGER NOT NULL, PRIMARY KEY (exercise_id, goal_id), FOREIGN KEY (exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE, FOREIGN KEY (goal_id) REFERENCES training_goals(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS exercise_relationships (from_exercise_id INTEGER NOT NULL, to_exercise_id INTEGER NOT NULL, relationship_type TEXT NOT NULL CHECK(relationship_type IN ("progression", "regression", "alternative", "superset")), notes TEXT, UNIQUE (from_exercise_id, to_exercise_id, relationship_type), FOREIGN KEY (from_exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE, FOREIGN KEY (to_exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS workout_routines_new (id INTEGER PRIMARY KEY, name TEXT, date_created DATE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS routine_exercises_new (id INTEGER PRIMARY KEY, routine_id INTEGER, exercise_id INTEGER, sets INTEGER, reps INTEGER, load_lbs FLOAT, order_index INTEGER, notes TEXT, FOREIGN KEY (routine_id) REFERENCES workout_routines_new(id) ON DELETE CASCADE, FOREIGN KEY (exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS workout_performance_new (id INTEGER PRIMARY KEY, routine_id INTEGER, exercise_id INTEGER, workout_date DATE, planned_sets INTEGER, actual_sets INTEGER, planned_reps INTEGER, actual_reps INTEGER, planned_load_lbs FLOAT, actual_load_lbs FLOAT, notes TEXT, completion_status TEXT, created_at TIMESTAMP, FOREIGN KEY (routine_id) REFERENCES workout_routines_new(id) ON DELETE CASCADE, FOREIGN KEY (exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS exercise_progression_new (id INTEGER PRIMARY KEY, exercise_id INTEGER, current_1rm_estimate FLOAT, volume_trend_30day FLOAT, last_pr_date DATE, progression_rate FLOAT, stall_indicator BOOLEAN, FOREIGN KEY (exercise_id) REFERENCES exercises_new(id) ON DELETE CASCADE)')
        cursor.execute('CREATE TABLE IF NOT EXISTS muscle_group_fatigue_new (id INTEGER PRIMARY KEY, muscle_group_id INTEGER UNIQUE, last_trained_date DATE, volume_7day FLOAT, volume_14day FLOAT, recovery_score FLOAT, updated_at TIMESTAMP, FOREIGN KEY (muscle_group_id) REFERENCES muscle_groups(id) ON DELETE CASCADE)')
        logger.info("Schema creation complete.")

    def _parse_delimited_field(self, field_value: Optional[str]) -> Set[str]:
        """Parses a comma-separated or JSON-array string field into a set of clean strings."""
        # (This helper function is unchanged and correct)
        if not field_value: return set()
        cleaned_set = set()
        try:
            if isinstance(field_value, str) and field_value.strip().startswith('['):
                data = json.loads(field_value)
                if isinstance(data, list): cleaned_set.update(str(item).strip() for item in data if str(item).strip())
            else: cleaned_set.update(item.strip() for item in str(field_value).split(',') if item.strip())
        except (json.JSONDecodeError, TypeError):
            cleaned_set.update(item.strip() for item in str(field_value).replace('[','').replace(']','').replace('"','').split(',') if item.strip())
        return cleaned_set

    def _populate_lookup_tables(self, conn: sqlite3.Connection):
        """Populates lookup tables and the corresponding name-to-ID maps."""
        # (This method is unchanged and correct)
        logger.info("Populating lookup tables (muscles, equipment, goals)...")
        cursor = conn.cursor()
        all_muscles, all_equipment, all_goals = set(), set(), set()
        cursor.execute("SELECT primary_muscles, secondary_muscles, equipment_required, equipment_optional, goals, training_style FROM exercises")
        for row in cursor.fetchall():
            all_muscles.update(self._parse_delimited_field(row['primary_muscles']))
            all_muscles.update(self._parse_delimited_field(row['secondary_muscles']))
            all_equipment.update(self._parse_delimited_field(row['equipment_required']))
            all_equipment.update(self._parse_delimited_field(row['equipment_optional']))
            all_goals.update(self._parse_delimited_field(row['goals']))
            all_goals.update(self._parse_delimited_field(row['training_style']))
        if all_muscles: cursor.executemany("INSERT OR IGNORE INTO muscle_groups (name) VALUES (?)", [(m,) for m in sorted(list(all_muscles))])
        if all_equipment: cursor.executemany("INSERT OR IGNORE INTO equipment (name) VALUES (?)", [(e,) for e in sorted(list(all_equipment))])
        if all_goals: cursor.executemany("INSERT OR IGNORE INTO training_goals (name) VALUES (?)", [(g,) for g in sorted(list(all_goals))])
        cursor.execute("SELECT id, name FROM muscle_groups"); self.maps["muscle_name_to_id"] = {row['name']: row['id'] for row in cursor.fetchall()}
        cursor.execute("SELECT id, name FROM equipment"); self.maps["equipment_name_to_id"] = {row['name']: row['id'] for row in cursor.fetchall()}
        cursor.execute("SELECT id, name FROM training_goals"); self.maps["goal_name_to_id"] = {row['name']: row['id'] for row in cursor.fetchall()}
        logger.info("Lookup tables and maps populated.")

    def _build_id_map_and_migrate_exercises(self, conn: sqlite3.Connection):
        """
        Robustly migrates exercises by inserting them one-by-one to capture
        their new ID, which guarantees the ID map is populated correctly.
        """
        logger.info("Analyzing old exercises and migrating them to build ID map...")
        cursor = conn.cursor()
        cursor.execute("SELECT * FROM exercises")
        old_exercises = cursor.fetchall()
        
        if not old_exercises:
            logger.warning("No exercises found in the old table. Nothing to migrate.")
            return

        self.maps["old_name_to_old_id"] = {ex['name']: ex['id'] for ex in old_exercises if 'name' in ex and ex['name']}
        
        # Determine canonical "winner" for each exercise name
        cleaned_name_to_ids = defaultdict(list)
        for ex in old_exercises:
            if 'name' in ex and ex['name']:
                cleaned_name = ex['name'].strip().lower()
                cleaned_name_to_ids[cleaned_name].append(ex['id'])

        canonical_map = {} # Maps every old ID -> winner old ID
        for cleaned_name, old_ids in cleaned_name_to_ids.items():
            winner_id = min(old_ids)
            for old_id in old_ids:
                canonical_map[old_id] = winner_id

        # --- NEW LOGIC: INSERT 1-BY-1 AND MAP ---
        old_winner_id_to_new_id = {}
        winner_ids = sorted(list(set(canonical_map.values())))

        logger.info(f"Found {len(winner_ids)} canonical exercises to migrate.")

        for winner_id in winner_ids:
            # Find the full row data for the winner
            winner_row = next((ex for ex in old_exercises if ex['id'] == winner_id), None)
            if not winner_row: continue

            # Prepare tuple for insertion
            data_tuple = (
                winner_row['name'],
                winner_row['description'] if 'description' in winner_row else None,
                winner_row['instructions'] if 'instructions' in winner_row else None,
                winner_row['unilateral'] if 'unilateral' in winner_row and winner_row['unilateral'] is not None else 0,
                winner_row['difficulty_rating'] if 'difficulty_rating' in winner_row else None,
                winner_row['rep_range_min'] if 'rep_range_min' in winner_row else None,
                winner_row['rep_range_max'] if 'rep_range_max' in winner_row else None,
                winner_row['tempo'] if 'tempo' in winner_row else None,
                winner_row['compound_vs_isolation'] if 'compound_vs_isolation' in winner_row else None,
                winner_row['image_url'] if 'image_url' in winner_row else None,
                winner_row['video_url'] if 'video_url' in winner_row else None,
                winner_row['popularity_score'] if 'popularity_score' in winner_row else 0
            )

            try:
                # Insert the single row
                cursor.execute('''
                    INSERT INTO exercises_new (name, description, instructions, unilateral, difficulty_rating,
                                             rep_range_min, rep_range_max, tempo, compound_vs_isolation,
                                             image_url, video_url, popularity_score)
                    VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)
                ''', data_tuple)
                
                # Capture the new ID and map it
                new_id = cursor.lastrowid
                old_winner_id_to_new_id[winner_id] = new_id

            except sqlite3.IntegrityError:
                logger.warning(f"Could not insert exercise '{winner_row['name']}' due to UNIQUE constraint. It might be a duplicate under a different old ID. Skipping.")
                # We need to find the new ID of the existing record
                cursor.execute("SELECT id FROM exercises_new WHERE name = ?", (winner_row['name'],))
                existing_row = cursor.fetchone()
                if existing_row:
                    old_winner_id_to_new_id[winner_id] = existing_row['id']

        # Finally, build the master map for ALL old exercises
        for old_ex in old_exercises:
            old_id = old_ex['id']
            winner_id = canonical_map.get(old_id)
            if winner_id:
                new_id = old_winner_id_to_new_id.get(winner_id)
                if new_id:
                    self.maps["old_id_to_new_id"][old_id] = new_id

        logger.info(f"Successfully built ID map for {len(self.maps['old_id_to_new_id'])} old exercises.")


    def _migrate_links_and_relationships(self, conn: sqlite3.Connection):
        """Migrates all many-to-many links and relationships using the maps."""
        # (This method is unchanged and correct)
        logger.info("Migrating exercise links (muscles, equipment, goals, relationships)...")
        cursor = conn.cursor()
        muscle_links, equipment_links, goal_links, relationship_links = [], [], [], []
        cursor.execute("SELECT * FROM exercises")
        for old_ex in cursor.fetchall():
            new_id = self.maps["old_id_to_new_id"].get(old_ex['id'])
            if not new_id: continue
            for muscle in self._parse_delimited_field(old_ex['primary_muscles']):
                if self.maps["muscle_name_to_id"].get(muscle): muscle_links.append((new_id, self.maps["muscle_name_to_id"][muscle], 1))
            for muscle in self._parse_delimited_field(old_ex['secondary_muscles']):
                if self.maps["muscle_name_to_id"].get(muscle): muscle_links.append((new_id, self.maps["muscle_name_to_id"][muscle], 0))
            for item in self._parse_delimited_field(old_ex['equipment_required']):
                if self.maps["equipment_name_to_id"].get(item): equipment_links.append((new_id, self.maps["equipment_name_to_id"][item], 1))
            for goal in self._parse_delimited_field(old_ex['goals']):
                if self.maps["goal_name_to_id"].get(goal): goal_links.append((new_id, self.maps["goal_name_to_id"][goal]))
            rel_map = {'progressions': 'progression', 'regressions': 'regression', 'alternatives': 'alternative', 'supersets_well_with': 'superset'}
            for field, rel_type in rel_map.items():
                if field in old_ex and old_ex[field]:
                    for related_name in self._parse_delimited_field(old_ex[field]):
                        related_old_id = self.maps["old_name_to_old_id"].get(related_name)
                        related_new_id = self.maps["old_id_to_new_id"].get(related_old_id) if related_old_id else None
                        if related_new_id: relationship_links.append((new_id, related_new_id, rel_type))
        if muscle_links: cursor.executemany("INSERT OR IGNORE INTO exercise_muscle_groups VALUES (?, ?, ?)", muscle_links)
        if equipment_links: cursor.executemany("INSERT OR IGNORE INTO exercise_equipment VALUES (?, ?, ?)", equipment_links)
        if goal_links: cursor.executemany("INSERT OR IGNORE INTO exercise_goals VALUES (?, ?)", goal_links)
        if relationship_links: cursor.executemany("INSERT OR IGNORE INTO exercise_relationships (from_exercise_id, to_exercise_id, relationship_type) VALUES (?, ?, ?)", relationship_links)
        logger.info("Finished migrating links and relationships.")

    def _migrate_dependent_data(self, conn: sqlite3.Connection):
        """Migrates data from dependent tables, correctly mapping foreign keys."""
        # (This method is unchanged and correct)
        logger.info("Migrating dependent tables using the ID map...")
        cursor = conn.cursor()
        cursor.execute("INSERT INTO workout_routines_new (id, name, date_created) SELECT id, name, date_created FROM workout_routines");
        logger.info("Migrated workout_routines.")
        cursor.execute("SELECT * FROM routine_exercises")
        new_routine_exercises = []
        for row in cursor.fetchall():
            new_ex_id = self.maps["old_id_to_new_id"].get(row['exercise_id'])
            if new_ex_id: new_routine_exercises.append((row['id'], row['routine_id'], new_ex_id, row['sets'], row['reps'], row['load_lbs'], row['order_index'], row['notes']))
        cursor.executemany("INSERT INTO routine_exercises_new VALUES (?, ?, ?, ?, ?, ?, ?, ?)", new_routine_exercises)
        logger.info(f"Migrated {len(new_routine_exercises)} records for routine_exercises.")
        cursor.execute("SELECT * FROM workout_performance")
        new_performance_data = []
        for row in cursor.fetchall():
            new_ex_id = self.maps["old_id_to_new_id"].get(row['exercise_id'])
            if new_ex_id: new_performance_data.append((row['id'], row['routine_id'], new_ex_id, row['workout_date'], row['planned_sets'], row['actual_sets'], row['planned_reps'], row['actual_reps'], row['planned_load_lbs'], row['actual_load_lbs'], row['notes'], row['completion_status'], row['created_at']))
        cursor.executemany("INSERT INTO workout_performance_new VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?, ?)", new_performance_data)
        logger.info(f"Migrated {len(new_performance_data)} records for workout_performance.")
        cursor.execute("SELECT * FROM exercise_progression")
        new_progression_data = []
        for row in cursor.fetchall():
            new_ex_id = self.maps["old_id_to_new_id"].get(row['exercise_id'])
            if new_ex_id: new_progression_data.append((row['id'], new_ex_id, row['current_1rm_estimate'], row['volume_trend_30day'], row['last_pr_date'], row['progression_rate'], row['stall_indicator']))
        cursor.executemany("INSERT INTO exercise_progression_new VALUES (?, ?, ?, ?, ?, ?, ?)", new_progression_data)
        logger.info(f"Migrated {len(new_progression_data)} records for exercise_progression.")
        cursor.execute("SELECT * FROM muscle_group_fatigue")
        new_fatigue_data = []
        for row in cursor.fetchall():
            muscle_id = self.maps["muscle_name_to_id"].get(row['muscle_group'])
            if muscle_id: new_fatigue_data.append((row['id'], muscle_id, row['last_trained_date'], row['volume_7day'], row['volume_14day'], row['recovery_score'], row['updated_at']))
        cursor.executemany("INSERT OR IGNORE INTO muscle_group_fatigue_new VALUES (?, ?, ?, ?, ?, ?, ?)", new_fatigue_data)
        logger.info(f"Migrated {len(new_fatigue_data)} records for muscle_group_fatigue.")

    def _finalize_schema(self, conn: sqlite3.Connection):
        """Drops all old tables and renames new tables to finalize the migration."""
        # (This method is unchanged and correct)
        logger.info("Finalizing schema: Dropping old tables and renaming new ones...")
        cursor = conn.cursor()
        old_tables = ['exercise_progression', 'muscle_group_fatigue', 'weekly_training_summary', 'workout_performance', 'routine_exercises', 'workout_routines', 'exercises']
        for table in old_tables: cursor.execute(f"DROP TABLE IF EXISTS {table}")
        new_to_final_map = {'exercises_new': 'exercises', 'workout_routines_new': 'workout_routines', 'routine_exercises_new': 'routine_exercises', 'workout_performance_new': 'workout_performance', 'exercise_progression_new': 'exercise_progression', 'muscle_group_fatigue_new': 'muscle_group_fatigue'}
        for new_name, final_name in new_to_final_map.items(): cursor.execute(f"ALTER TABLE {new_name} RENAME TO {final_name}")
        logger.info("Schema finalized successfully.")

    def run_migration(self) -> bool:
        """Executes the full, transactional migration process."""
        # (This method is unchanged and correct)
        logger.info("--- Starting Database Migration Process ---")
        if not backup_database(self.db_path):
            logger.error("Migration halted due to backup failure.")
            return False
        try:
            with db_connection(self.db_path) as conn:
                try:
                    conn.execute("BEGIN")
                    self._create_normalized_schema(conn)
                    self._populate_lookup_tables(conn)
                    self._build_id_map_and_migrate_exercises(conn)
                    self._migrate_links_and_relationships(conn)
                    self._migrate_dependent_data(conn)
                    self._finalize_schema(conn)
                    conn.commit()
                    logger.info("Migration transaction committed successfully!")
                    return True
                except Exception as e:
                    logger.error(f"Migration failed during transaction: {e}", exc_info=True)
                    conn.rollback()
                    logger.critical("--- TRANSACTION ROLLED BACK. DATABASE UNCHANGED. ---")
                    return False
        except Exception as e:
            logger.error(f"Failed to connect to the database: {e}", exc_info=True)
            return False

if __name__ == '__main__':
    DB_PATH = 'strava_data.db' 
    
    logger.info(f"--- Running Migration Script for: {DB_PATH} ---")
    
    migration_handler = RunStrongMigration(DB_PATH)
    success = migration_handler.run_migration()
    
    if success:
        logger.info("--- MIGRATION SUCCEEDED ---")
    else:
        logger.error("--- MIGRATION FAILED. Please check logs and restore from the created backup if necessary. ---")

[{'name': 'activities'}, {'name': 'streams'}, {'name': 'gear'}, {'name': 'daily_training_metrics'}, {'name': 'conversations'}, {'name': 'muscle_groups'}, {'name': 'equipment'}, {'name': 'training_goals'}, {'name': 'exercises'}, {'name': 'exercise_muscle_groups'}, {'name': 'exercise_equipment'}, {'name': 'exercise_goals'}, {'name': 'exercise_relationships'}, {'name': 'workout_routines'}, {'name': 'routine_exercises'}, {'name': 'workout_performance'}, {'name': 'exercise_progression'}, {'name': 'muscle_group_fatigue'}]
[]
