# FTMS Speed Analysis Notebook

This notebook analyzes the relationship between instantaneous speed values and reported average speed in FTMS data. We'll calculate rolling averages of instantaneous speed and compare them to the device-reported average speed to identify any scaling issues.

In [None]:
# Import required libraries
import os
import sys
import sqlite3
import json
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from datetime import datetime
import seaborn as sns

# Fix the path to properly import modules
# Get the absolute path to the project root directory
notebook_dir = os.path.dirname(os.path.abspath(''))
project_root = os.path.abspath(os.path.join(notebook_dir, '..'))
print(f"Project root: {project_root}")

# Add project root to the path
if project_root not in sys.path:
    sys.path.insert(0, project_root)

# Import our database module - with direct path, not using src.data
sys.path.append(os.path.join(project_root, 'src', 'data'))
from database import Database

ModuleNotFoundError: No module named 'src'

In [None]:
# Connect to the database
db_path = os.path.join(project_root, 'src', 'data', 'rogue_garmin.db')
print(f"Database path: {db_path}")
print(f"Database exists: {os.path.exists(db_path)}")

# Create database instance
db = Database(db_path)

# Get recent workouts to analyze
workouts = db.get_workouts(limit=10)
print(f"Found {len(workouts)} workouts to analyze")

# Show basic information about the workouts
workout_df = pd.DataFrame([
    {
        'id': w['id'],
        'type': w['workout_type'],
        'duration': w['duration'],
        'start_time': w['start_time'],
        'device': w['device_name'],
        'avg_speed': w['summary'].get('avg_speed', 'N/A')
    } for w in workouts
])

workout_df

## Speed Analysis Functions

Let's define some functions to analyze the speed data from workouts:

In [None]:
def extract_speed_data(workout_id):
    """Extract speed data for a specific workout
    
    Returns:
        DataFrame with timestamp, instantaneous_speed, average_speed columns
    """
    # Get the raw workout data
    workout_data = db.get_workout_data(workout_id)
    
    # Extract speed values and timestamps
    data = []
    for point in workout_data:
        point_data = point['data']
        
        # Extract instantaneous speed from various possible field names
        inst_speed = None
        for field in ['instantaneous_speed', 'instant_speed', 'speed']:
            if field in point_data and point_data[field] is not None:
                inst_speed = float(point_data[field])
                break
        
        # Extract average speed if available
        avg_speed = None
        if 'average_speed' in point_data and point_data['average_speed'] is not None:
            avg_speed = float(point_data['average_speed'])
        
        if inst_speed is not None:  # Only include points with speed data
            data.append({
                'timestamp': point['timestamp'],
                'instantaneous_speed': inst_speed,
                'average_speed': avg_speed
            })
    
    # Convert to DataFrame
    df = pd.DataFrame(data)
    
    # Calculate time elapsed in seconds
    if not df.empty:
        df['elapsed_seconds'] = (df['timestamp'] - df['timestamp'].iloc[0]).dt.total_seconds()
        
        # Calculate rolling averages of different window sizes
        df['rolling_avg_5s'] = df['instantaneous_speed'].rolling(window=5, min_periods=1).mean()
        df['rolling_avg_10s'] = df['instantaneous_speed'].rolling(window=10, min_periods=1).mean()
        df['rolling_avg_30s'] = df['instantaneous_speed'].rolling(window=30, min_periods=1).mean()
        df['cumulative_avg'] = df['instantaneous_speed'].expanding().mean()
    
    return df

def analyze_speed_ratios(df):
    """Analyze the ratio between instantaneous and average speeds"""
    if df.empty or 'average_speed' not in df.columns:
        return None
    
    # Filter out rows where average_speed is None
    avg_df = df[df['average_speed'].notna()].copy()
    
    if avg_df.empty:
        return None
    
    # Calculate ratios between instantaneous and average speeds
    avg_df['inst_to_avg_ratio'] = avg_df['instantaneous_speed'] / avg_df['average_speed']
    avg_df['rolling_to_avg_ratio'] = avg_df['rolling_avg_30s'] / avg_df['average_speed']
    avg_df['cumulative_to_avg_ratio'] = avg_df['cumulative_avg'] / avg_df['average_speed']
    
    # Calculate statistics
    stats = {
        'mean_inst_speed': avg_df['instantaneous_speed'].mean(),
        'mean_avg_speed': avg_df['average_speed'].mean(),
        'median_ratio': avg_df['inst_to_avg_ratio'].median(),
        'mean_ratio': avg_df['inst_to_avg_ratio'].mean(),
        'std_ratio': avg_df['inst_to_avg_ratio'].std(),
        'cumulative_ratio': avg_df['cumulative_to_avg_ratio'].iloc[-1] if not avg_df.empty else None,
    }
    
    return stats

## Analyze Recent Workouts

Let's analyze a recent workout to see the relationship between instantaneous and average speed values:

In [None]:
# Find a recent bike workout with speed data
bike_workouts = [w for w in workouts if w['workout_type'] == 'bike']

if bike_workouts:
    # Analyze the most recent bike workout
    workout = bike_workouts[0]
    workout_id = workout['id']
    
    print(f"Analyzing workout {workout_id} from {workout['start_time']}")
    print(f"Summary avg_speed: {workout['summary'].get('avg_speed', 'N/A')}")
    
    # Extract speed data
    speed_df = extract_speed_data(workout_id)
    
    if not speed_df.empty:
        print(f"Found {len(speed_df)} data points with speed information")
        
        # Display basic statistics
        speed_stats = speed_df[['instantaneous_speed', 'average_speed']].describe()
        print("\nSpeed Statistics:")
        display(speed_stats)
        
        # Analyze the first few and last few rows to see the pattern
        print("\nFirst 5 data points:")
        display(speed_df[['elapsed_seconds', 'instantaneous_speed', 'average_speed', 'rolling_avg_10s', 'cumulative_avg']].head())
        
        print("\nLast 5 data points:")
        display(speed_df[['elapsed_seconds', 'instantaneous_speed', 'average_speed', 'rolling_avg_10s', 'cumulative_avg']].tail())
    else:
        print("No speed data found in this workout")
else:
    print("No bike workouts found in the recent workouts")

## Visualize Speed Comparison

Let's visualize the instantaneous speed, calculated averages, and reported average speed:

In [None]:
if not speed_df.empty and 'average_speed' in speed_df.columns and speed_df['average_speed'].notna().any():
    plt.figure(figsize=(14, 8))
    
    # Plot instantaneous speed
    plt.plot(speed_df['elapsed_seconds'], speed_df['instantaneous_speed'], 
             label='Instantaneous Speed', color='blue', alpha=0.5)
    
    # Plot rolling averages
    plt.plot(speed_df['elapsed_seconds'], speed_df['rolling_avg_30s'], 
             label='30s Rolling Average', color='green', linewidth=2)
    
    # Plot cumulative average
    plt.plot(speed_df['elapsed_seconds'], speed_df['cumulative_avg'], 
             label='Cumulative Average', color='purple', linewidth=2)
    
    # Plot reported average speed
    avg_speed_points = speed_df[speed_df['average_speed'].notna()]
    if not avg_speed_points.empty:
        plt.plot(avg_speed_points['elapsed_seconds'], avg_speed_points['average_speed'], 
                 label='Device-reported Average', color='red', linewidth=2)
    
    plt.title(f'Speed Analysis for Workout {workout_id}')
    plt.xlabel('Time (seconds)')
    plt.ylabel('Speed (km/h)')
    plt.grid(True, alpha=0.3)
    plt.legend()
    plt.show()
    
    # If we have average speed data, analyze the ratio
    ratio_stats = analyze_speed_ratios(speed_df)
    if ratio_stats:
        print("\nSpeed Ratio Analysis:")
        print(f"Mean Instantaneous Speed: {ratio_stats['mean_inst_speed']:.2f} km/h")
        print(f"Mean Device-reported Average Speed: {ratio_stats['mean_avg_speed']:.2f} km/h")
        print(f"Ratio (Instantaneous/Average): {ratio_stats['mean_ratio']:.2f}")
        print(f"Cumulative Average / Device Average Ratio: {ratio_stats['cumulative_ratio']:.2f}")
        
        # Calculate approximate scaling factor
        scaling_factor = round(ratio_stats['mean_ratio'] / 5) * 5
        if scaling_factor > 1:
            print(f"\nPotential scaling factor needed: ~{scaling_factor}x")
            print(f"This suggests multiplying the device-reported average speed by {scaling_factor} might correct the issue.")
else:
    print("Not enough data to generate speed comparison chart")

## Test Correction Factor

Let's test different correction factors to see which one brings the average speed closest to the calculated cumulative average:

In [None]:
if not speed_df.empty and 'average_speed' in speed_df.columns and speed_df['average_speed'].notna().any():
    # Filter to points with average speed data
    avg_points = speed_df[speed_df['average_speed'].notna()].copy()
    
    if not avg_points.empty:
        # Test different correction factors
        factors = [1, 5, 10, 15, 16, 17, 18, 19, 20, 25, 30]
        errors = []
        
        for factor in factors:
            corrected_avg = avg_points['average_speed'] * factor
            error = abs(corrected_avg - avg_points['cumulative_avg']).mean()
            errors.append({
                'factor': factor,
                'mean_error': error,
                'corrected_avg': corrected_avg.mean(),
                'target_avg': avg_points['cumulative_avg'].mean()
            })
        
        error_df = pd.DataFrame(errors)
        error_df = error_df.sort_values('mean_error')
        
        print("Tested correction factors (sorted by smallest error):")
        display(error_df)
        
        # Plot the best correction factors
        best_factor = error_df.iloc[0]['factor']
        
        plt.figure(figsize=(14, 8))
        
        # Plot cumulative average (our target)
        plt.plot(avg_points['elapsed_seconds'], avg_points['cumulative_avg'], 
                 label='Cumulative Average (Target)', color='purple', linewidth=2)
        
        # Plot uncorrected average speed
        plt.plot(avg_points['elapsed_seconds'], avg_points['average_speed'], 
                 label='Uncorrected Device Average', color='red', linewidth=2)
        
        # Plot corrected average speed with best factor
        plt.plot(avg_points['elapsed_seconds'], avg_points['average_speed'] * best_factor, 
                 label=f'Corrected (×{best_factor})', color='green', linewidth=2)
        
        plt.title(f'Average Speed Correction Test')
        plt.xlabel('Time (seconds)')
        plt.ylabel('Speed (km/h)')
        plt.grid(True, alpha=0.3)
        plt.legend()
        plt.show()
        
        print(f"\nRecommended correction: Multiply device-reported average speed by {best_factor}")
else:
    print("Not enough data to test correction factors")

## Fix Implementation

Based on the analysis above, here's the code to fix the average speed issue in `workout_manager.py`:

In [None]:
def fix_average_speed(data_point, correction_factor=20):
    """Apply correction factor to average speed if needed"""
    
    # Check if we have both instantaneous and average speed
    inst_speed = None
    for key in ['instantaneous_speed', 'instant_speed', 'speed']:
        if key in data_point and data_point[key] is not None:
            inst_speed = data_point[key]
            break
    
    avg_speed = data_point.get('average_speed')
    
    # Only apply correction if both values are present
    if inst_speed is not None and avg_speed is not None:
        # Check if average speed is unreasonably low compared to instantaneous
        if inst_speed > 5.0 and avg_speed < (inst_speed / 5):
            # Apply the correction factor
            corrected_avg = avg_speed * correction_factor
            print(f"Correcting average speed: {avg_speed:.2f} → {corrected_avg:.2f} km/h")
            return corrected_avg
    
    return avg_speed

# Sample implementation for workout_manager.py
code_fix = """
def _update_bike_metrics(self, data: Dict[str, Any]) -> None:
    # ...existing code...
    
    # Use average speed directly from device if available
    if 'average_speed' in data and data['average_speed'] is not None:
        # Log the device-reported average speed
        logger.debug(f"Raw device-reported average speed: {data['average_speed']} km/h")
        
        # Get instantaneous speed for comparison
        inst_speed = data.get('instant_speed', data.get('instantaneous_speed', data.get('speed', 0)))
        
        # Check if average speed is unreasonably low compared to instantaneous
        if inst_speed > 5.0 and data['average_speed'] < (inst_speed / 5):
            # Apply a correction factor of 20x (based on analysis)
            correction_factor = 20
            corrected_speed = data['average_speed'] * correction_factor
            logger.info(f"Correcting abnormally low average speed: {data['average_speed']} → {corrected_speed} km/h (factor: {correction_factor}x)")
            self.summary_metrics['avg_speed'] = corrected_speed
        else:
            self.summary_metrics['avg_speed'] = data['average_speed']
    # ...existing code...
"""

print(code_fix)