# Daily Fitness Metrics Analysis
## Data Loading and Preprocessing

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path
from src.data_loader import load_data

%matplotlib inline
plt.style.use('seaborn-whitegrid')

In [3]:
def load_daily_metrics(data_dir: str) -> pd.DataFrame:
    """Load and concatenate all daily metric CSV files"""
    metrics_path = Path(data_dir) / 'Fit' / 'Métriques d_activité quotidiennes'
    dfs = []
    
    for csv_file in metrics_path.glob('*.csv'):
        df = pd.read_csv(
            csv_file,
            parse_dates=['Heure de début', 'Heure de fin'],
            dayfirst=True,
            na_values=['', ' ']
        )
        dfs.append(df)
    
    return pd.concat(dfs, ignore_index=True)

# Load and preprocess data
daily_df = load_daily_metrics('..')

# Clean column names and types
daily_df = daily_df.rename(columns={
    'Nombre de minutes actives': 'active_minutes',
    'Calories (kcal)': 'calories',
    'Distance (m)': 'distance_m',
    'Points cardio': 'cardio_points',
    'Nombre de pas': 'steps'
}).pipe(lambda df: df.assign(
    date=pd.to_datetime(df['Heure de début'].dt.date),
    distance_km=df['distance_m'] / 1000
))

# Handle missing data
numeric_cols = ['active_minutes', 'calories', 'distance_km', 'cardio_points', 'steps']
daily_df[numeric_cols] = daily_df[numeric_cols].fillna(0).apply(pd.to_numeric, errors='coerce')

NameError: name 'pd' is not defined

## Temporal Aggregation and Analysis

In [None]:
# Create daily aggregates
daily_agg = daily_df.groupby('date').agg({
    'active_minutes': 'sum',
    'calories': 'sum',
    'distance_km': 'sum',
    'cardio_points': 'sum',
    'steps': 'sum'
}).reset_index()

# Calculate 7-day rolling averages
rolling_window = daily_agg.set_index('date').rolling('7D').mean().reset_index()
rolling_window.columns = [f'{col}_7d_avg' if col != 'date' else col for col in rolling_window.columns]

## Visualization Suite

In [None]:
def plot_metric_trend(df: pd.DataFrame, metric: str, title: str):
    """Plot metric with rolling average"""
    plt.figure(figsize=(12, 6))
    sns.lineplot(data=df, x='date', y=metric, label='Daily')
    sns.lineplot(data=df, x='date', y=f'{metric}_7d_avg', label='7D Avg')
    plt.title(f'{title} Trend Analysis')
    plt.xlabel('Date')
    plt.ylabel(metric.replace('_', ' ').title())
    plt.xticks(rotation=45)
    plt.tight_layout()
    plt.show()

# Generate trend plots
for metric in ['active_minutes', 'calories', 'distance_km', 'steps']:
    merged_df = daily_agg.merge(rolling_window, on='date')
    plot_metric_trend(merged_df, metric, metric.replace('_', ' ').title())

## Statistical Analysis

In [None]:
# Correlation analysis
corr_matrix = daily_agg[numeric_cols].corr()

plt.figure(figsize=(10, 8))
sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f',
            annot_kws={'size': 12}, cbar_kws={'shrink': 0.82})
plt.title('Metric Correlation Matrix')
plt.tight_layout()
plt.show()

# Weekly aggregation
weekly_agg = daily_agg.set_index('date').resample('W-MON').agg({
    'active_minutes': ['sum', 'mean', 'max'],
    'calories': ['sum', 'mean'],
    'distance_km': ['sum', 'mean'],
    'steps': ['sum', 'mean']
})

weekly_agg.head().style.background_gradient(cmap='Blues')

## Key Insights Documentation

In [None]:
# Summary statistics
daily_agg.describe().T.style.format('{:.2f}')