# Tennis Ball Trajectory Prediction with GBRT

- **Data Preparation**: Transform preprocessed tennis data to match GBRT requirements
- **Feature Engineering**: Create physics-based features for ball trajectory prediction
- **Model Training**: Run the optimized GBRT model with Bayesian hyperparameter optimization
- **Performance Evaluation**: Assess prediction accuracy and model performance

### GBRT Model Features:
- **Multi-output regression** for x,y ball position prediction
- **Time series cross-validation** for temporal data
- **Bayesian optimization** for hyperparameter tuning
- **Physics-informed features** including velocity, acceleration, and court-relative positions

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.ensemble import GradientBoostingRegressor
from sklearn.model_selection import TimeSeriesSplit, train_test_split
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
from sklearn.impute import SimpleImputer
from sklearn.multioutput import MultiOutputRegressor
from sklearn.pipeline import Pipeline
from sklearn.preprocessing import StandardScaler
from skopt import BayesSearchCV
from skopt.space import Real, Integer
from scipy.signal import savgol_filter
import warnings
warnings.filterwarnings('ignore')

import data_analysis_help.gbrt_training_helper as gbrt_helper

plt.style.use('default')
sns.set_palette("husl")

print("Libraries imported successfully!")
print("Ready for GBRT tennis ball trajectory prediction")

##  Data Loading and Preparation

In [None]:

dataset_file = 'dataset/complete_tennis_comprehensive_preprocessed_ml4qs.csv'

try:
    print(f"Loading: {dataset_file}")
    df_raw = pd.read_csv(dataset_file)
    
    print("Dataset loaded successfully!")
    print(f"Shape: {df_raw.shape[0]:,} rows × {df_raw.shape[1]} columns")
    
    video_cols = [col for col in df_raw.columns if 'video' in col.lower() and 'source' in col.lower()]
    if video_cols:
        video_col = video_cols[0]
        unique_videos = df_raw[video_col].nunique()
        print(f"Videos: {unique_videos} unique videos")
    
    if 'time_seconds' in df_raw.columns:
        print(f"Duration: {df_raw['time_seconds'].max():.1f} seconds")
    
    if 'ball_detected' in df_raw.columns:
        detection_rate = df_raw['ball_detected'].mean()
        print(f"Ball detection rate: {detection_rate:.1%}")
    
    print(f"\nKey columns for GBRT:")
    key_cols = ['timestamp', 'ball_center_x', 'ball_center_y', 'ball_x1', 'ball_y1', 'ball_x2', 'ball_y2', 
                'time_seconds', 'court_width_pixels', 'court_height_pixels', 'player_1_center_x', 'player_1_center_y']
    
    available_key_cols = [col for col in key_cols if col in df_raw.columns]
    for col in available_key_cols:
        missing_pct = df_raw[col].isnull().mean() * 100
        print(f"   {col}: {missing_pct:.1f}% missing")
    
except FileNotFoundError:
    print(f"File not found: {dataset_file}")
    print("Available files in directory:")
    import glob
    csv_files = glob.glob('*.csv')
    for f in csv_files:
        print(f"  - {f}")
    raise FileNotFoundError(f"Please ensure {dataset_file} exists in the current directory")
    
except Exception as e:
    print(f"Error loading dataset: {e}")
    raise

In [None]:
df_prepared = gbrt_helper.load_and_prepare_data(df_raw.copy())

## Feature Engineering for GBRT

In [None]:
df_features = gbrt_helper.create_features(df_prepared, window_size=5)

## Sequence Preparation for Time Series Prediction

In [None]:
X, y, feature_cols, target_cols = gbrt_helper.prepare_sequences(df_features, hist_frames=12, pred_frames=5)

print(f"\nFinal training data summary:")
print(f" Input sequences (X): {X.shape}")
print(f"   Target sequences (y): {y.shape}")
print(f"   Features per frame: {len(feature_cols)}")
print(f"  Sequence input dimension: {X.shape[1]} (12 frames × {len(feature_cols)} features)")
print(f" Sequence output dimension: {y.shape[1]} (5 frames × 2 coordinates)")

## GBRT Model Training with Bayesian Optimization

In [None]:

model = gbrt_helper.train_physics_gbrt(X, y)

print("\nModel training completed!")

## Model Evaluation and Performance Analysis

In [None]:
gbrt_params = gbrt_helper.get_model_parameters(model)

print(f"\nMulti-output setup:")
print(f"   Number of outputs: {len(target_cols)} (x, y coordinates)")
print(f"   Prediction horizon: 5 frames")
print(f"   Total predictions per sequence: 10 (5 frames × 2 coordinates)")

In [None]:
print("MODEL EVALUATION")
print("=" * 30)

metrics = gbrt_helper.evaluate_model(model, X, y, target_cols)

## Visualization of Results

In [None]:
fig = gbrt_helper.create_performance_plots(metrics)
plt.show()

In [None]:
fig = gbrt_helper.create_trajectory_plots(metrics)
plt.show()

## 7. Model Summary and Export

In [None]:
performance_level = gbrt_helper.generate_model_summary(df_raw, df_features, X, feature_cols, video_cols, metrics)

In [None]:
export_info = gbrt_helper.save_model_and_results(model, df_raw, df_features, X, feature_cols, video_cols, metrics)