In [None]:
# F1 Pit Stop Strategy - Exploratory Analysis
# This notebook demonstrates the key functionality of the system

# ## 1. Setup and Imports

import sys
sys.path.append('..')

import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from src.data_collection import F1DataCollector
from src.data_preprocessing import F1DataPreprocessor
from src.model_training import F1ModelTrainer
from src.optimization_engine import F1StrategyOptimizer
from src.visualization import F1Visualizer

import warnings
warnings.filterwarnings('ignore')

# Set visualization style
plt.style.use('seaborn-v0_8-darkgrid')
sns.set_palette("husl")

# ## 2. Data Collection Example

# Initialize collector
collector = F1DataCollector(cache_dir='../data/raw/cache')

# Collect data for 2023 season, first 3 races
print("Collecting F1 data...")
data = collector.collect_all_data(years=[2023], races=[1, 2, 3])

laps_df = data['laps']
ergast_df = data['ergast']

print(f"Collected {len(laps_df)} laps from {laps_df['RaceNumber'].nunique()} races")

# ## 3. Data Exploration

# Display basic statistics
print("\n=== Lap Data Overview ===")
print(laps_df.info())
print("\n=== Sample Data ===")
print(laps_df.head(10))

# Check compound distribution
print("\n=== Tire Compound Distribution ===")
print(laps_df['Compound'].value_counts())

# ## 4. Data Preprocessing

preprocessor = F1DataPreprocessor()

# Clean data
print("\n=== Cleaning Data ===")
cleaned_df = preprocessor.clean_lap_data(laps_df)

# Engineer features
print("\n=== Engineering Features ===")
featured_df = preprocessor.engineer_features(cleaned_df)
pitstop_df = preprocessor.create_pitstop_features(featured_df)

print(f"\nFinal dataset shape: {pitstop_df.shape}")
print(f"Features: {pitstop_df.columns.tolist()}")

# ## 5. Exploratory Data Analysis

viz = F1Visualizer(figsize=(14, 8))

# Plot tire degradation by compound
print("\n=== Tire Degradation Analysis ===")
viz.plot_tire_degradation(pitstop_df)

# Plot lap time evolution for top drivers
print("\n=== Lap Time Evolution ===")
top_drivers = pitstop_df.groupby('Driver')['LapNumber'].count().nlargest(5).index.tolist()
viz.plot_lap_time_evolution(pitstop_df, drivers=top_drivers)

# Analyze stint lengths
print("\n=== Stint Length Distribution ===")
plt.figure(figsize=(10, 6))
sns.histplot(data=pitstop_df, x='StintLength', hue='Compound', bins=30, alpha=0.6)
plt.title('Stint Length Distribution by Compound')
plt.xlabel('Stint Length (laps)')
plt.ylabel('Count')
plt.show()

# ## 6. Model Training

from sklearn.model_selection import train_test_split

# Prepare training data
X, y = preprocessor.prepare_training_data(pitstop_df)

print(f"\n=== Training Data ===")
print(f"Features shape: {X.shape}")
print(f"Target shape: {y.shape}")

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

# Train models
trainer = F1ModelTrainer(random_state=42)
print("\n=== Training Models ===")
results = trainer.train_all_models(X_train, y_train, X_test, y_test)

# Display results
print("\n=== Model Comparison ===")
for model_name, result in results.items():
    if model_name != 'best_model':
        metrics = result['metrics']
        print(f"\n{model_name.upper()}")
        print(f"  MAE:  {metrics['mae']:.4f}")
        print(f"  RMSE: {metrics['rmse']:.4f}")
        print(f"  R²:   {metrics['r2']:.4f}")

# Feature importance
best_model = results[results['best_model']]['model']
importance_df = trainer.get_feature_importance(best_model, X.columns.tolist())

viz.plot_feature_importance(importance_df, top_n=15)

# ## 7. Strategy Optimization

# Initialize optimizer with trained model
optimizer = F1StrategyOptimizer(best_model, pit_loss_seconds=25.0)

# Define race parameters (example: 58 lap race)
race_params = {
    'total_laps': 58,
    'base_lap_time': 82.0,
    'fuel_effect': 0.05,
    'max_stops': 2,
    'min_stint': 10
}

# Optimize strategy
print("\n=== Optimizing Strategy ===")
strategies = optimizer.optimize_strategy(race_params, n_strategies=100)

# Compare top strategies
comparison_df = optimizer.compare_strategies(strategies, top_n=10)

print("\n=== Top 10 Strategies ===")
print(comparison_df)

# Visualize strategy comparison
viz.plot_strategy_comparison(comparison_df)

# ## 8. Best Strategy Analysis

best_strategy = strategies[0]

print("\n" + "="*70)
print("OPTIMAL STRATEGY")
print("="*70)
print(f"Total Race Time: {best_strategy.total_time:.2f} seconds")
print(f"Pit Stops: {len(best_strategy.pit_laps)}")
print(f"Pit Laps: {best_strategy.pit_laps}")
print(f"Tire Sequence: {' → '.join(best_strategy.tire_sequence)}")
print("\nStint Breakdown:")
for stint in best_strategy.stints:
    print(f"  Stint {stint['stint_number']}: Laps {stint['start_lap']}-{stint['end_lap']} "
          f"({stint['length']} laps) on {stint['compound']} tires")

# ## 9. Interactive Dashboard

print("\n=== Creating Interactive Dashboard ===")
viz.create_interactive_strategy_dashboard(pitstop_df, comparison_df)

# ## 10. Generate Report

viz.generate_strategy_report(best_strategy, comparison_df)

# ## Summary

print("\n" + "="*70)
print("ANALYSIS COMPLETE")
print("="*70)
print(f"✓ Processed {len(pitstop_df)} laps")
print(f"✓ Trained {len(results)-1} models")
print(f"✓ Evaluated {len(strategies)} strategies")
print(f"✓ Optimal strategy: {len(best_strategy.pit_laps)}-stop with "
      f"{' → '.join(best_strategy.tire_sequence)} tires")
print(f"✓ Estimated time saving: {comparison_df.iloc[1]['time_delta']:.2f} seconds "
      f"vs 2nd best strategy")