# FIT Timestamp Conversion Error Analysis

This notebook analyzes the issue with timestamp conversion in the Rogue Garmin Bridge application. The error occurs when converting workout data to FIT format.

Error from logs:
```
Error converting bike workout to FIT: timestamp encoded value -629950691 is not in valid range [0, 4294967295]
```

In [None]:
import os
import sys
import sqlite3
import time
from datetime import datetime, timezone
import json
import pandas as pd
import matplotlib.pyplot as plt

# Add the project root to the path for imports
sys.path.insert(0, os.path.abspath(os.path.dirname(os.getcwd())))

# Import project modules
from src.data.database import Database
from src.data.workout_manager import WorkoutManager

## 1. Connect to the Database and Inspect Workout ID 1

In [None]:
# Connect to the database
db_path = os.path.join(os.getcwd(), 'src/data/rogue_garmin.db')
db = Database(db_path)

# Get workout 1
workout_id = 1
workout = db.get_workout(workout_id)

print(f"Workout ID: {workout['id']}")
print(f"Type: {workout['workout_type']}")
print(f"Start time (Unix): {workout['start_time']}")
print(f"Start time (ISO): {datetime.fromtimestamp(workout['start_time'], tz=timezone.utc).isoformat()}")
if workout.get('end_time'):
    print(f"End time (Unix): {workout['end_time']}")
    print(f"End time (ISO): {datetime.fromtimestamp(workout['end_time'], tz=timezone.utc).isoformat()}")
    print(f"Duration: {workout['end_time'] - workout['start_time']} seconds")
else:
    print("Workout not completed")

## 2. Check Workout Data Points

# FIT Conversion Error Log Analysis

This notebook is designed to analyze and process log data for FIT conversion errors in the Rogue Garmin Bridge application. It helps identify patterns, timestamp issues, and common errors to improve the conversion process.

## Import Required Libraries
Import libraries such as re for regular expressions, pandas for data manipulation, and datetime for timestamp processing.

In [None]:
# Import necessary libraries
import re
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import os
import json
from pathlib import Path

# Set display options for better readability
pd.set_option('display.max_rows', 100)
pd.set_option('display.max_columns', None)
pd.set_option('display.width', 1000)

## Load and Parse Log Data
Load the log data into a structured format, such as a pandas DataFrame, by extracting relevant fields like timestamp, log level, module, and message.

In [None]:
def parse_log_line(line):
    """Parse a single log line into a structured format."""
    # Example log format: 2023-05-20 14:30:45,123 - INFO - module_name - Message content
    pattern = r'(\d{4}-\d{2}-\d{2} \d{2}:\d{2}:\d{2},\d{3}) - (\w+) - ([^-]+) - (.+)'
    match = re.match(pattern, line)
    
    if match:
        timestamp, level, module, message = match.groups()
        return {
            'timestamp': timestamp.strip(),
            'level': level.strip(),
            'module': module.strip(),
            'message': message.strip()
        }
    return None

def load_log_file(file_path):
    """Load and parse a log file into a DataFrame."""
    log_entries = []
    
    try:
        with open(file_path, 'r') as f:
            for line in f:
                entry = parse_log_line(line)
                if entry:
                    log_entries.append(entry)
                    
        return pd.DataFrame(log_entries)
    except Exception as e:
        print(f"Error loading log file: {e}")
        return pd.DataFrame()

# Replace with your actual log file path
log_file_path = "../logs/rogue_garmin_bridge.log"

# Check if the file exists before loading
if os.path.exists(log_file_path):
    df_logs = load_log_file(log_file_path)
    print(f"Loaded {len(df_logs)} log entries")
    display(df_logs.head())
else:
    print(f"Log file not found: {log_file_path}")
    # Create a sample DataFrame for demonstration
    df_logs = pd.DataFrame({
        'timestamp': ['2023-05-20 14:30:45,123', '2023-05-20 14:30:46,456'],
        'level': ['INFO', 'ERROR'],
        'module': ['fit_converter', 'fit_converter'],
        'message': ['Processing workout 123456', 'Invalid timestamp range detected: 2023-05-20 14:30:40 - 2023-05-20 14:29:40']
    })
    print("Created sample data for demonstration")

# Convert timestamp to datetime
if not df_logs.empty:
    df_logs['datetime'] = pd.to_datetime(df_logs['timestamp'], format='%Y-%m-%d %H:%M:%S,%f')

## Extract and Analyze Timestamps
Convert Unix and FIT timestamps to human-readable formats and calculate differences or inconsistencies.

In [None]:
def extract_timestamps(message):
    """Extract Unix and FIT timestamps from log messages."""
    # Example patterns to look for
    unix_pattern = r'Unix timestamp: (\d+)'
    fit_pattern = r'FIT timestamp: (\d+)'
    
    unix_match = re.search(unix_pattern, message)
    fit_match = re.search(fit_pattern, message)
    
    unix_ts = int(unix_match.group(1)) if unix_match else None
    fit_ts = int(fit_match.group(1)) if fit_match else None
    
    return unix_ts, fit_ts

def convert_unix_to_datetime(unix_ts):
    """Convert Unix timestamp to datetime."""
    if unix_ts:
        return datetime.fromtimestamp(unix_ts)
    return None

def convert_fit_to_datetime(fit_ts):
    """Convert FIT timestamp to datetime (seconds since UTC 00:00 Dec 31 1989)."""
    if fit_ts:
        fit_epoch = datetime(1989, 12, 31, 0, 0, 0)
        return fit_epoch + timedelta(seconds=fit_ts)
    return None

# Extract timestamps from messages that contain them
if not df_logs.empty:
    # Create new columns for extracted timestamps
    df_logs['unix_ts'], df_logs['fit_ts'] = zip(*df_logs['message'].apply(extract_timestamps))
    
    # Convert timestamps to datetime
    df_logs['unix_datetime'] = df_logs['unix_ts'].apply(convert_unix_to_datetime)
    df_logs['fit_datetime'] = df_logs['fit_ts'].apply(convert_fit_to_datetime)
    
    # Calculate time differences where both timestamps exist
    mask = (df_logs['unix_datetime'].notna()) & (df_logs['fit_datetime'].notna())
    df_logs.loc[mask, 'timestamp_diff_seconds'] = (
        df_logs.loc[mask, 'unix_datetime'] - df_logs.loc[mask, 'fit_datetime']
    ).dt.total_seconds()
    
    # Display timestamp analysis results
    timestamp_analysis = df_logs[mask][['datetime', 'unix_datetime', 'fit_datetime', 'timestamp_diff_seconds']]
    display(timestamp_analysis.head())
    
    if not timestamp_analysis.empty:
        # Plot histogram of timestamp differences
        plt.figure(figsize=(10, 6))
        timestamp_analysis['timestamp_diff_seconds'].hist(bins=30)
        plt.title('Distribution of Timestamp Differences (Unix - FIT)')
        plt.xlabel('Difference in seconds')
        plt.ylabel('Frequency')
        plt.grid(True, alpha=0.3)
        plt.show()

## Identify and Handle Errors
Filter log entries for errors and warnings, and analyze the causes of issues such as invalid timestamp ranges.

In [None]:
# Filter for error and warning messages
if not df_logs.empty:
    df_errors = df_logs[df_logs['level'] == 'ERROR']
    df_warnings = df_logs[df_logs['level'] == 'WARNING']
    
    print(f"Found {len(df_errors)} errors and {len(df_warnings)} warnings")
    
    # Display the most common errors
    if not df_errors.empty:
        print("\nMost common error messages:")
        common_errors = df_errors['message'].value_counts().head(10)
        display(common_errors)
        
        # Extract workout IDs from error messages
        workout_id_pattern = r'workout[_\s]?id[:\s]+(\w+)'
        df_errors['workout_id'] = df_errors['message'].str.extract(workout_id_pattern, expand=False)
        
        # Count errors by workout ID
        if 'workout_id' in df_errors.columns and df_errors['workout_id'].notna().any():
            print("\nErrors by workout ID:")
            errors_by_workout = df_errors['workout_id'].value_counts()
            display(errors_by_workout.head(10))
            
    # Analyze invalid timestamp ranges
    timestamp_range_pattern = r'Invalid timestamp range detected: (.+)'
    timestamp_errors = df_logs[df_logs['message'].str.contains('Invalid timestamp range', na=False)]
    
    if not timestamp_errors.empty:
        print(f"\nFound {len(timestamp_errors)} invalid timestamp range errors")
        display(timestamp_errors.head())
        
        # Extract the timestamp ranges for closer analysis
        timestamp_errors['range_info'] = timestamp_errors['message'].str.extract(timestamp_range_pattern, expand=False)
        display(timestamp_errors['range_info'].head())

In [None]:
# Categorize different types of errors
if not df_logs.empty and 'message' in df_logs.columns:
    error_categories = {
        'timestamp': ['timestamp', 'time range', 'invalid date'],
        'format': ['format', 'malformed', 'parsing error'],
        'missing_data': ['missing', 'not found', 'empty'],
        'connection': ['connection', 'timeout', 'network'],
        'authentication': ['auth', 'token', 'permission'],
        'file_io': ['file', 'io error', 'cannot open', 'cannot write']
    }
    
    # Create columns for each error category
    for category, keywords in error_categories.items():
        pattern = '|'.join(keywords)
        df_logs[f'{category}_error'] = df_logs['message'].str.contains(
            pattern, case=False, na=False
        ).astype(int)
    
    # Sum up each category
    error_counts = df_logs[[f'{category}_error' for category in error_categories.keys()]].sum()
    
    # Plot error categories
    plt.figure(figsize=(10, 6))
    error_counts.plot(kind='bar', color='crimson')
    plt.title('Error Categories')
    plt.xlabel('Category')
    plt.ylabel('Count')
    plt.xticks(rotation=45)
    plt.grid(True, alpha=0.3)
    plt.tight_layout()
    plt.show()

## Generate Summary Report
Summarize the findings, including the number of errors, affected workouts, and any patterns in the data.

In [None]:
# Generate a summary report of the log analysis
def generate_summary_report(df):
    """Generate a summary report of log analysis findings."""
    if df.empty or 'level' not in df.columns:
        return "No valid log data to analyze."
    
    # Basic statistics
    total_entries = len(df)
    log_timespan = None
    if 'datetime' in df.columns and df['datetime'].notna().any():
        min_time = df['datetime'].min()
        max_time = df['datetime'].max()
        log_timespan = f"{min_time} to {max_time}"
    
    # Count by log level
    level_counts = df['level'].value_counts()
    
    # Count by module
    module_counts = df['module'].value_counts() if 'module' in df.columns else None
    
    # Error analysis
    error_df = df[df['level'] == 'ERROR'] if 'level' in df.columns else pd.DataFrame()
    total_errors = len(error_df)
    
    # Affected workouts
    affected_workouts = None
    if 'workout_id' in error_df.columns and error_df['workout_id'].notna().any():
        affected_workouts = error_df['workout_id'].nunique()
    
    # Timestamp error analysis
    timestamp_errors = df[df['message'].str.contains('timestamp|time range', case=False, na=False)]
    
    # Generate the report
    report = f"""
    # Log Analysis Summary Report
    
    ## Overview
    - Total log entries: {total_entries}
    - Log time span: {log_timespan if log_timespan else 'Unknown'}
    - Total errors: {total_errors}
    - Affected workouts: {affected_workouts if affected_workouts is not None else 'Unknown'}
    
    ## Log Level Distribution
    {level_counts.to_string() if level_counts is not None else 'No data'}
    
    ## Module Distribution
    {module_counts.head(10).to_string() if module_counts is not None else 'No data'}
    
    ## Error Analysis
    - Timestamp-related errors: {len(timestamp_errors)}
    - Most common error: {error_df['message'].value_counts().index[0] if not error_df.empty and 'message' in error_df.columns else 'None'}
    
    ## Recommendations
    1. Review timestamp handling in the FIT conversion process
    2. Add more validation checks for workout data before conversion
    3. Implement better error recovery mechanisms
    4. Consider adding more detailed logging for problematic areas
    """
    
    return report

# Generate and display the summary report
if not df_logs.empty:
    summary_report = generate_summary_report(df_logs)
    print(summary_report)
    
    # Save the report to a markdown file
    report_path = "log_analysis_report.md"
    with open(report_path, "w") as f:
        f.write(summary_report)
    print(f"Report saved to {report_path}")
    
    # Create visualizations for the report
    if 'level' in df_logs.columns:
        plt.figure(figsize=(10, 6))
        df_logs['level'].value_counts().plot(kind='pie', autopct='%1.1f%%')
        plt.title('Log Entries by Level')
        plt.ylabel('')
        plt.show()
    
    # Timeline of errors
    if 'datetime' in df_logs.columns and 'level' in df_logs.columns:
        errors_over_time = df_logs[df_logs['level'] == 'ERROR'].set_index('datetime')
        if not errors_over_time.empty:
            plt.figure(figsize=(12, 6))
            errors_over_time.resample('1H').size().plot()
            plt.title('Errors Over Time')
            plt.xlabel('Time')
            plt.ylabel('Number of Errors')
            plt.grid(True, alpha=0.3)
            plt.show()

## Conclusion

This notebook provides a comprehensive analysis of FIT conversion error logs. The insights gained from this analysis can help identify:

1. Common error patterns in the FIT conversion process
2. Issues with timestamp handling and validation
3. Specific workouts that consistently cause errors
4. Trends in error occurrence over time

Using these insights, the development team can make targeted improvements to the Rogue Garmin Bridge application to increase reliability and reduce conversion errors.