# Interactive Visualization Demo

This notebook demonstrates the interactive Plotly visualizations for the F1 Lap Time Prediction project.

**Features:**
- Interactive scatter plots with hover data
- Feature importance visualization
- Error distribution histograms
- Export to standalone HTML for web integration

---

In [None]:
# Install Plotly if not available
try:
    import plotly
    print(f"Plotly version: {plotly.__version__}")
except ImportError:
    print("Installing Plotly...")
    !pip install plotly -q
    import plotly
    print(f"Plotly installed: {plotly.__version__}")

In [None]:
import sys
sys.path.insert(0, '..')

import warnings
warnings.filterwarnings('ignore')

import numpy as np
import pandas as pd
from pathlib import Path

# Project imports
from src.data_loader import load_laps_for_seasons, clean_laps, enable_cache
from src.features import build_feature_table
from src.models import make_comparison_models, set_global_seed
from src.evaluation import time_based_split, train_and_score

# Visualization imports
from src.visualization import (
    plot_interactive_actual_vs_pred,
    plot_feature_importance,
    plot_error_distribution,
    plot_error_by_category,
    save_interactive_plot,
    create_dashboard,
)

# Settings
RANDOM_STATE = 42
WEB_DIR = Path('reports/web')
WEB_DIR.mkdir(parents=True, exist_ok=True)

set_global_seed(RANDOM_STATE)
print("Imports successful!")

## 1. Load Data & Train Model

In [None]:
# Load and prepare data
enable_cache()
raw_laps = load_laps_for_seasons([2022, 2023])
clean_df = clean_laps(raw_laps, verbose=False)
feature_df, numeric_cols, categorical_cols = build_feature_table(clean_df, verbose=False)

# Split data
train_df, test_df = time_based_split(feature_df, test_season=2023)
print(f"Train: {len(train_df):,} | Test: {len(test_df):,}")

In [None]:
# Train models
feature_cols = numeric_cols + categorical_cols
models = make_comparison_models(numeric_cols, categorical_cols, RANDOM_STATE)

metrics_df, predictions, fitted_models = train_and_score(
    models, train_df, test_df, feature_cols, verbose=True
)

# Get best model predictions
best_model_name = metrics_df.iloc[0]['model']
y_test = test_df['LapTimeSeconds'].values
y_pred = predictions[best_model_name]

print(f"\nBest model: {best_model_name}")

---
## 2. Interactive Actual vs Predicted Plot

Hover over points to see Driver, LapNumber, Compound, and more.

In [None]:
# Create interactive scatter plot
fig_scatter = plot_interactive_actual_vs_pred(
    y_test, y_pred, test_df,
    hover_cols=['Driver', 'LapNumber', 'Compound', 'Circuit', 'TyreLife', 'EstimatedGrip'],
    title=f"Predicted vs Actual Lap Time ({best_model_name})",
    sample_size=5000,  # Sample for performance
)
fig_scatter.show()

In [None]:
# Color by compound
fig_by_compound = plot_interactive_actual_vs_pred(
    y_test, y_pred, test_df,
    color_by='Compound',
    title="Predictions by Tire Compound",
    sample_size=5000,
)
fig_by_compound.show()

---
## 3. Error Distribution

In [None]:
# Interactive error distribution
fig_error = plot_error_distribution(
    y_test, y_pred,
    title=f"Prediction Error Distribution ({best_model_name})",
    bins=60,
    show_stats=True,
)
fig_error.show()

---
## 4. Feature Importance (XGBoost)

In [None]:
# Get XGBoost model for feature importance
xgb_model = fitted_models.get('XGBoost')

if xgb_model:
    fig_importance = plot_feature_importance(
        xgb_model,
        top_n=15,
        title="XGBoost Feature Importance (Top 15)",
        interactive=True,
    )
    fig_importance.show()
else:
    print("XGBoost model not found in fitted_models.")

---
## 5. Error by Category

In [None]:
# Error by Circuit
fig_circuit = plot_error_by_category(
    y_test, y_pred, test_df,
    category_col='Circuit',
    title='Mean Absolute Error by Circuit',
    top_n=15,
)
fig_circuit.show()

In [None]:
# Error by Driver
fig_driver = plot_error_by_category(
    y_test, y_pred, test_df,
    category_col='Driver',
    title='Mean Absolute Error by Driver',
    top_n=20,
)
fig_driver.show()

---
## 6. Dashboard View

In [None]:
# Create comprehensive dashboard
fig_dashboard = create_dashboard(
    y_test, y_pred, test_df,
    fitted_model=xgb_model,
    title=f"F1 Lap Time Prediction Dashboard - {best_model_name}",
)
fig_dashboard.show()

---
## 7. Export to HTML for Web

Save interactive plots as standalone HTML files for the website.

In [None]:
# Save all plots to reports/web/
saved_files = []

# 1. Actual vs Predicted
path = save_interactive_plot(fig_scatter, WEB_DIR / 'actual_vs_predicted.html')
saved_files.append(path)
print(f"Saved: {path}")

# 2. Error Distribution
path = save_interactive_plot(fig_error, WEB_DIR / 'error_distribution.html')
saved_files.append(path)
print(f"Saved: {path}")

# 3. Feature Importance
if xgb_model:
    path = save_interactive_plot(fig_importance, WEB_DIR / 'feature_importance.html')
    saved_files.append(path)
    print(f"Saved: {path}")

# 4. Error by Circuit
path = save_interactive_plot(fig_circuit, WEB_DIR / 'error_by_circuit.html')
saved_files.append(path)
print(f"Saved: {path}")

# 5. Dashboard
path = save_interactive_plot(fig_dashboard, WEB_DIR / 'dashboard.html')
saved_files.append(path)
print(f"Saved: {path}")

print(f"\n{len(saved_files)} HTML files saved to {WEB_DIR}/")

In [None]:
# List all saved files
print("\nGenerated HTML files for web:")
for f in sorted(WEB_DIR.glob('*.html')):
    size_kb = f.stat().st_size / 1024
    print(f"  {f.name:<30} ({size_kb:.1f} KB)")

---
## Usage Summary

### Quick Reference

```python
from src.visualization import (
    plot_interactive_actual_vs_pred,
    plot_feature_importance,
    plot_error_distribution,
    plot_error_by_category,
    save_interactive_plot,
    create_dashboard,
)

# 1. Actual vs Predicted with hover
fig = plot_interactive_actual_vs_pred(
    y_test, y_pred, test_df,
    hover_cols=['Driver', 'LapNumber', 'Compound'],
    color_by='Compound',  # Optional
)

# 2. Feature importance from XGBoost
fig = plot_feature_importance(fitted_model, top_n=10)

# 3. Error distribution histogram
fig = plot_error_distribution(y_test, y_pred, show_stats=True)

# 4. Error by category
fig = plot_error_by_category(y_test, y_pred, test_df, 'Circuit')

# 5. Save to HTML
save_interactive_plot(fig, 'reports/web/my_plot.html')

# 6. Complete dashboard
fig = create_dashboard(y_test, y_pred, test_df, fitted_model)
```

### HTML Integration

The saved HTML files are standalone and can be embedded in any website:

```html
<iframe src="reports/web/dashboard.html" width="100%" height="800px"></iframe>
```