In [None]:
import pandas as pd
import os
import glob
from fitparse import FitFile
from sqlalchemy import create_engine, text
from sqlalchemy.exc import SQLAlchemyError

# in terminal need to create a database in postgres??:
# > psql postgres
# > CREATE DATABASE database_name;


# --- CONFIGURATION ---

DB_USER = 'bac'
DB_NAME = 'starter_project'
DB_HOST = 'localhost'
DB_PORT = '5432'

ACTIVITIES_FILE = 'activities.csv'
FIT_FILES_FOLDER = 'path/to/your/fit_files/'

IMPORTANT_FIELDS = ['heart_rate', 'cadence', 'speed']

# SQLAlchemy engine (no warnings)
engine = create_engine(f'postgresql+psycopg2://{DB_USER}@{DB_HOST}:{DB_PORT}/{DB_NAME}')

# --- HELPER FUNCTIONS ---

def create_tables():
    """Create activities and fit_logs tables if not exist."""
    with engine.connect() as conn:
        conn.execute(text("""
            CREATE TABLE IF NOT EXISTS activities (
                activity_id SERIAL PRIMARY KEY,
                name TEXT,
                date TIMESTAMP,
                distance FLOAT,
                duration FLOAT,
                fit_filename TEXT UNIQUE
            )
        """))

        conn.execute(text("""
            CREATE TABLE IF NOT EXISTS fit_logs (
                log_id BIGSERIAL PRIMARY KEY,
                activity_id INTEGER REFERENCES activities(activity_id),
                timestamp TIMESTAMP,
                field_name TEXT,
                field_value FLOAT
            )
        """))
    print("✅ Tables ready.")

def load_activities():
    """Load activities.csv into the database."""
    df = pd.read_csv(ACTIVITIES_FILE)

    # Make sure fit_filename is present
    if 'fit_filename' not in df.columns:
        raise ValueError("activities.csv must have a 'fit_filename' column.")

    # Insert into DB
    df.to_sql('activities', engine, if_exists='append', index=False, method='multi')
    print(f"✅ Inserted {len(df)} activities.")

def parse_fit_file(filepath):
    """Parse a .fit file into structured log entries."""
    try:
        fitfile = FitFile(filepath)
    except Exception as e:
        print(f"⚠️ Failed to parse {filepath}: {e}")
        return []

    entries = []
    for record in fitfile.get_messages('record'):
        timestamp = None
        metrics = []

        for field in record:
            if field.name == 'timestamp':
                timestamp = field.value
            elif field.name in IMPORTANT_FIELDS and field.value is not None:
                metrics.append((field.name, field.value))

        if timestamp:
            for field_name, field_value in metrics:
                entries.append({
                    'timestamp': timestamp,
                    'field_name': field_name,
                    'field_value': field_value
                })

    return entries

def find_activity_id(filename, activities_df):
    """Match .fit filename to activity_id."""
    match = activities_df[activities_df['fit_filename'] == filename]
    if not match.empty:
        return match.iloc[0]['activity_id']
    else:
        print(f"⚠️ No matching activity found for {filename}")
        return None

def insert_fit_logs():
    """Loop over fit files, parse, and insert logs into the database."""
    activities_df = pd.read_sql("SELECT activity_id, fit_filename FROM activities", engine)

    fit_files = glob.glob(os.path.join(FIT_FILES_FOLDER, '*.fit'))

    total_logs_inserted = 0

    for filepath in fit_files:
        filename = os.path.basename(filepath)
        activity_id = find_activity_id(filename, activities_df)

        if activity_id is None:
            continue

        logs = parse_fit_file(filepath)

        if not logs:
            continue

        # Attach activity_id to each log
        for log in logs:
            log['activity_id'] = activity_id

        logs_df = pd.DataFrame(logs)

        # Insert in bulk
        try:
            logs_df.to_sql('fit_logs', engine, if_exists='append', index=False, method='multi')
            total_logs_inserted += len(logs_df)
            print(f"✅ Inserted {len(logs_df)} logs from {filename}")
        except SQLAlchemyError as e:
            print(f"⚠️ Database error inserting {filename}: {e}")

    print(f"\n✅ Total logs inserted: {total_logs_inserted}")

# --- MAIN FLOW ---

if __name__ == "__main__":
    create_tables()
    load_activities()
    insert_fit_logs()
    print("\n🏁 All done!")
