# Garmin Training Data Analysis & AI Preparation

This notebook prepares Garmin training data for an AI-powered training planner. We'll:
1. Load and clean the training data
2. Analyze training patterns and trends
3. Visualize key metrics
4. Format data for AI chat context

## 1. Import Required Libraries

In [None]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from datetime import datetime, timedelta
import json

# Set plotting style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# Display settings
pd.set_option('display.max_columns', None)
pd.set_option('display.max_rows', 100)

print("✓ Libraries imported successfully")

## 2. Load Garmin Training Data

In [None]:
# Load the CSV file
df = pd.read_csv('Activities.csv')

# Convert date column to datetime
df['Dato'] = pd.to_datetime(df['Dato'])

# Sort by date descending (most recent first)
df = df.sort_values('Dato', ascending=False).reset_index(drop=True)

print(f"✓ Loaded {len(df)} activities")
print(f"  Date range: {df['Dato'].min().date()} to {df['Dato'].max().date()}")
print(f"\nActivity types: {df['Aktivitetstype'].unique()}")
print(f"\nDataset shape: {df.shape}")
df.head()

## 3. Explore and Clean the Data

In [None]:
# Check for missing values
print("Missing values per column:")
print(df.isnull().sum()[df.isnull().sum() > 0])

# Clean numeric columns (Danish format uses comma for decimals)
df_clean = df.copy()

numeric_cols = ['Distance', 'Kalorier', 'Gennemsnitlig puls', 'Maks. puls', 
               'Aerob TE', 'Samlet stigning', 'Skridt']

for col in numeric_cols:
    if col in df_clean.columns:
        # Replace comma with dot for decimal numbers
        df_clean[col] = df_clean[col].astype(str).str.replace(',', '.')
        df_clean[col] = pd.to_numeric(df_clean[col], errors='coerce')

# Parse time duration (format: H:MM or H:MM:SS)
def parse_duration(time_str):
    try:
        if pd.isna(time_str):
            return np.nan
        parts = str(time_str).split(':')
        if len(parts) == 2:  # H:MM
            return int(parts[0]) * 60 + int(parts[1])
        elif len(parts) == 3:  # H:MM:SS
            return int(parts[0]) * 60 + int(parts[1]) + int(parts[2]) / 60
    except:
        return np.nan

df_clean['Duration_Minutes'] = df_clean['Tid'].apply(parse_duration)

print(f"\n✓ Data cleaned")
print(f"  Activities with distance: {df_clean['Distance'].notna().sum()}")
print(f"  Activities with heart rate: {df_clean['Gennemsnitlig puls'].notna().sum()}")
df_clean.head()

## 4. Extract Key Training Metrics

Let's visualize the activity distribution and key metrics.

In [None]:
# Activity distribution
activity_summary = df_clean.groupby('Aktivitetstype').agg({
    'Dato': 'count',
    'Distance': 'sum',
    'Duration_Minutes': 'sum',
    'Kalorier': 'sum'
}).round(2)

activity_summary.columns = ['Count', 'Total_Distance_km', 'Total_Duration_min', 'Total_Calories']
activity_summary = activity_summary.sort_values('Count', ascending=False)

print("Activity Distribution:")
print(activity_summary)

# Visualize activity distribution
fig, axes = plt.subplots(2, 2, figsize=(15, 10))

# Activity count
activity_summary['Count'].plot(kind='bar', ax=axes[0, 0], color='skyblue')
axes[0, 0].set_title('Activity Count by Type', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Activity Type')
axes[0, 0].set_ylabel('Count')
axes[0, 0].tick_params(axis='x', rotation=45)

# Total distance
activity_summary['Total_Distance_km'].plot(kind='bar', ax=axes[0, 1], color='coral')
axes[0, 1].set_title('Total Distance by Activity Type (km)', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Activity Type')
axes[0, 1].set_ylabel('Distance (km)')
axes[0, 1].tick_params(axis='x', rotation=45)

# Total duration
activity_summary['Total_Duration_min'].plot(kind='bar', ax=axes[1, 0], color='lightgreen')
axes[1, 0].set_title('Total Duration by Activity Type (min)', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Activity Type')
axes[1, 0].set_ylabel('Duration (min)')
axes[1, 0].tick_params(axis='x', rotation=45)

# Total calories
activity_summary['Total_Calories'].plot(kind='bar', ax=axes[1, 1], color='gold')
axes[1, 1].set_title('Total Calories by Activity Type', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Activity Type')
axes[1, 1].set_ylabel('Calories')
axes[1, 1].tick_params(axis='x', rotation=45)

plt.tight_layout()
plt.show()

## 5. Aggregate Training Data by Week

Analyze weekly training patterns and volume.

In [None]:
# Add week information
df_clean['Week'] = df_clean['Dato'].dt.isocalendar().week
df_clean['Year'] = df_clean['Dato'].dt.year
df_clean['Week_Start'] = df_clean['Dato'].dt.to_period('W').apply(lambda r: r.start_time)

# Weekly aggregation (last 12 weeks)
latest_date = df_clean['Dato'].max()
cutoff_date = latest_date - timedelta(weeks=12)
df_recent = df_clean[df_clean['Dato'] >= cutoff_date]

weekly_total = df_recent.groupby('Week_Start').agg({
    'Duration_Minutes': 'sum',
    'Distance': 'sum',
    'Aerob TE': 'sum',
    'Kalorier': 'sum',
    'Dato': 'count'
}).round(2)

weekly_total.columns = ['Total_Minutes', 'Total_Distance_km', 
                        'Total_Training_Effect', 'Total_Calories', 'Activity_Count']

print("Weekly Training Summary (Last 12 Weeks):")
print(weekly_total)

# Plot weekly trends
fig, axes = plt.subplots(2, 2, figsize=(16, 10))

# Weekly distance
axes[0, 0].plot(weekly_total.index, weekly_total['Total_Distance_km'], 
                marker='o', linewidth=2, markersize=8, color='blue')
axes[0, 0].set_title('Weekly Distance Trend', fontsize=14, fontweight='bold')
axes[0, 0].set_xlabel('Week')
axes[0, 0].set_ylabel('Distance (km)')
axes[0, 0].grid(True, alpha=0.3)

# Weekly duration
axes[0, 1].plot(weekly_total.index, weekly_total['Total_Minutes'], 
                marker='s', linewidth=2, markersize=8, color='green')
axes[0, 1].set_title('Weekly Duration Trend', fontsize=14, fontweight='bold')
axes[0, 1].set_xlabel('Week')
axes[0, 1].set_ylabel('Duration (minutes)')
axes[0, 1].grid(True, alpha=0.3)

# Weekly training effect
axes[1, 0].plot(weekly_total.index, weekly_total['Total_Training_Effect'], 
                marker='^', linewidth=2, markersize=8, color='red')
axes[1, 0].set_title('Weekly Training Effect', fontsize=14, fontweight='bold')
axes[1, 0].set_xlabel('Week')
axes[1, 0].set_ylabel('Total Training Effect')
axes[1, 0].grid(True, alpha=0.3)

# Weekly activity count
axes[1, 1].bar(weekly_total.index, weekly_total['Activity_Count'], 
               color='purple', alpha=0.7)
axes[1, 1].set_title('Weekly Activity Count', fontsize=14, fontweight='bold')
axes[1, 1].set_xlabel('Week')
axes[1, 1].set_ylabel('Number of Activities')
axes[1, 1].grid(True, alpha=0.3, axis='y')

plt.tight_layout()
plt.show()

## 6. Create Training Summary Statistics

Calculate key performance indicators and personal records.

In [None]:
# Personal Records and Stats
records = {
    'Total Activities': len(df_clean),
    'Total Distance (km)': round(df_clean['Distance'].sum(), 2),
    'Total Duration (hours)': round(df_clean['Duration_Minutes'].sum() / 60, 2),
    'Total Calories': int(df_clean['Kalorier'].sum()),
    'Longest Run (km)': round(df_clean[df_clean['Aktivitetstype'] == 'Løb']['Distance'].max(), 2),
    'Longest Cycling (km)': round(df_clean[df_clean['Aktivitetstype'].str.contains('Cykling', na=False)]['Distance'].max(), 2),
    'Max Heart Rate (bpm)': int(df_clean['Maks. puls'].max()),
    'Avg Heart Rate (bpm)': round(df_clean['Gennemsnitlig puls'].mean(), 0),
}

# Calculate weekly averages
date_range_days = (df_clean['Dato'].max() - df_clean['Dato'].min()).days
if date_range_days > 0:
    weeks = date_range_days / 7
    records['Avg Weekly Distance (km)'] = round(records['Total Distance (km)'] / weeks, 2)
    records['Avg Weekly Duration (hours)'] = round(records['Total Duration (hours)'] / weeks, 2)

print("=" * 60)
print("PERSONAL RECORDS & STATISTICS")
print("=" * 60)
for key, value in records.items():
    print(f"{key:.<40} {value}")
print("=" * 60)

# Activity frequency over time
activity_freq = df_clean.groupby(df_clean['Dato'].dt.to_period('M')).size()

plt.figure(figsize=(14, 5))
activity_freq.plot(kind='bar', color='teal', alpha=0.7)
plt.title('Monthly Activity Frequency', fontsize=16, fontweight='bold')
plt.xlabel('Month')
plt.ylabel('Number of Activities')
plt.xticks(rotation=45)
plt.grid(True, alpha=0.3, axis='y')
plt.tight_layout()
plt.show()

## 7. Format Data for AI Chat Context

Prepare structured data that can be used for AI training planning.

In [None]:
# Prepare AI context with last 4 weeks of data
weeks_history = 4
latest_date = df_clean['Dato'].max()
cutoff_date = latest_date - timedelta(weeks=weeks_history)
df_ai = df_clean[df_clean['Dato'] >= cutoff_date]

# Weekly summary by activity type
weekly_by_activity = df_ai.groupby(['Week_Start', 'Aktivitetstype']).agg({
    'Distance': 'sum',
    'Duration_Minutes': 'sum',
    'Kalorier': 'sum',
    'Aerob TE': 'mean',
    'Dato': 'count'
}).round(2)

weekly_by_activity.columns = ['Total_Distance_km', 'Total_Duration_min', 
                               'Total_Calories', 'Avg_Aerobic_TE', 'Activity_Count']

print("Weekly Summary by Activity Type (Last 4 Weeks):")
print(weekly_by_activity)

# Recent activities for context
recent_activities = df_clean.head(15)[
    ['Dato', 'Aktivitetstype', 'Distance', 'Duration_Minutes', 
     'Gennemsnitlig puls', 'Aerob TE', 'Kalorier']
].copy()

recent_activities['Dato'] = recent_activities['Dato'].dt.strftime('%Y-%m-%d')

print("\n\nRecent Activities (Last 15):")
print(recent_activities)

## 8. Export Processed Data

Export data in formats suitable for AI training planning.

In [None]:
# Create comprehensive AI context
ai_context = {
    'summary': {
        'date_generated': datetime.now().strftime('%Y-%m-%d'),
        'data_period': f"Last {weeks_history} weeks",
        'total_activities': len(df_ai),
    },
    'personal_records': records,
    'weekly_summary': weekly_by_activity.reset_index().to_dict(orient='records'),
    'recent_activities': recent_activities.to_dict(orient='records'),
    'activity_distribution': activity_summary.to_dict(),
}

# Export to JSON
with open('training_data_for_ai.json', 'w', encoding='utf-8') as f:
    json.dump(ai_context, f, indent=2, default=str, ensure_ascii=False)

print("✓ Exported training context to 'training_data_for_ai.json'")

# Generate AI-friendly prompt
prompt = f"""# Training Planning Context

## Personal Records & Overall Stats
- Total Activities: {records['Total Activities']}
- Total Distance: {records['Total Distance (km)']} km
- Total Training Time: {records['Total Duration (hours)']} hours
- Longest Run: {records['Longest Run (km)']} km
- Average Weekly Distance: {records['Avg Weekly Distance (km)']} km
- Average Weekly Duration: {records['Avg Weekly Duration (hours)']} hours

## Recent Training Pattern (Last {weeks_history} weeks)
"""

# Add weekly summary
for idx, row in weekly_by_activity.reset_index().iterrows():
    week_date = pd.to_datetime(row['Week_Start']).strftime('%Y-%m-%d')
    prompt += f"\n**Week starting {week_date} - {row['Aktivitetstype']}:**\n"
    prompt += f"  - Activities: {int(row['Activity_Count'])}\n"
    prompt += f"  - Distance: {row['Total_Distance_km']} km\n"
    prompt += f"  - Duration: {row['Total_Duration_min']} min\n"
    if not pd.isna(row['Avg_Aerobic_TE']):
        prompt += f"  - Avg Aerobic TE: {row['Avg_Aerobic_TE']}\n"

# Add recent activities
prompt += "\n## Most Recent Activities\n"
for idx, activity in recent_activities.head(10).iterrows():
    prompt += f"\n- **{activity['Dato']}** - {activity['Aktivitetstype']}: "
    if not pd.isna(activity['Distance']):
        prompt += f"{activity['Distance']} km, "
    prompt += f"{activity['Duration_Minutes']:.0f} min"
    if not pd.isna(activity['Aerob TE']):
        prompt += f", TE: {activity['Aerob TE']}"
    prompt += "\n"

prompt += """
## Request
Based on my training history above, please:
1. Analyze my current training patterns and identify strengths/weaknesses
2. Suggest a balanced weekly training plan for the next week
3. Include specific workouts with recommended duration, intensity, and type
4. Consider recovery needs based on my recent training load
5. Help me progress toward my goals while avoiding overtraining

## My Goals
[Add your specific training goals here]
"""

# Save prompt to file
with open('sample_ai_prompt.txt', 'w', encoding='utf-8') as f:
    f.write(prompt)

print("✓ Generated AI prompt and saved to 'sample_ai_prompt.txt'")
print("\n" + "=" * 60)
print("SAMPLE AI PROMPT:")
print("=" * 60)
print(prompt[:1000] + "...\n[truncated]")

## Next Steps

Now you can use the generated files with an AI chat:

1. **`training_data_for_ai.json`**: Contains structured data about your training
2. **`sample_ai_prompt.txt`**: Ready-to-use prompt for AI chat

### How to Use with AI Chat:
1. Copy the content from `sample_ai_prompt.txt`
2. Add your specific training goals at the end
3. Paste into your AI chat (ChatGPT, Claude, etc.)
4. The AI will analyze your data and create a personalized weekly training plan

### Alternative: Use the Python script
Run `python garmin_data_processor.py` to process data and generate prompts programmatically.