# F1 Betting Market Prediction Models

This notebook implements sophisticated probabilistic models for F1 betting markets, including:
- Exact finishing position predictions
- Head-to-head matchup probabilities
- Points scoring probabilities
- DNF risk assessment
- Calibrated probability outputs for odds generation

In [1]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.isotonic import IsotonicRegression
from sklearn.calibration import CalibratedClassifierCV
from sklearn.ensemble import RandomForestClassifier, GradientBoostingClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.multiclass import OneVsRestClassifier
from sklearn.model_selection import train_test_split
from scipy import stats
from scipy.special import softmax
import matplotlib.pyplot as plt
import seaborn as sns
import warnings
warnings.filterwarnings('ignore')

In [2]:
import zipfile
import os

# Extract F1 data if not already extracted
if not os.path.exists('data/results.csv'):
    print("Extracting F1 data from f1db_csv.zip...")
    os.makedirs('data', exist_ok=True)
    with zipfile.ZipFile('f1db_csv.zip', 'r') as zip_ref:
        zip_ref.extractall('data/')
    print("Data extraction complete!")
else:
    print("F1 data already extracted.")

F1 data already extracted.


## 1. Load and Prepare Data for Betting Models

In [3]:
# Load datasets
results = pd.read_csv('data/results.csv')
races = pd.read_csv('data/races.csv')
drivers = pd.read_csv('data/drivers.csv')
constructors = pd.read_csv('data/constructors.csv')
qualifying = pd.read_csv('data/qualifying.csv')
driver_standings = pd.read_csv('data/driver_standings.csv')
constructor_standings = pd.read_csv('data/constructor_standings.csv')
status = pd.read_csv('data/status.csv')
circuits = pd.read_csv('data/circuits.csv')

# Merge core data
df = results.merge(races[['raceId', 'year', 'round', 'circuitId', 'date', 'name']], on='raceId')
df = df.merge(drivers[['driverId', 'driverRef', 'code']], on='driverId')
df = df.merge(constructors[['constructorId', 'constructorRef']], on='constructorId')
df = df.merge(circuits[['circuitId', 'circuitRef']], on='circuitId')
df = df.merge(status[['statusId', 'status']], on='statusId')
df = df.merge(qualifying[['raceId', 'driverId', 'position']], 
              on=['raceId', 'driverId'], 
              how='left', 
              suffixes=('', '_quali'))

# Focus on recent data for betting relevance
df['date'] = pd.to_datetime(df['date'])
df_betting = df[df['year'] >= 2018].copy()

# Create betting-relevant features
df_betting['finished'] = (df_betting['statusId'] == 1).astype(int)
df_betting['points_scored'] = (df_betting['points'] > 0).astype(int)
df_betting['podium'] = (df_betting['positionOrder'] <= 3) & (df_betting['finished'] == 1)
df_betting['top_5'] = (df_betting['positionOrder'] <= 5) & (df_betting['finished'] == 1)
df_betting['top_10'] = (df_betting['positionOrder'] <= 10) & (df_betting['finished'] == 1)

print(f"Betting dataset shape: {df_betting.shape}")
print(f"Years covered: {df_betting['year'].min()} - {df_betting['year'].max()}")
print(f"Total races: {df_betting['raceId'].nunique()}")

Betting dataset shape: (2619, 34)
Years covered: 2018 - 2024
Total races: 131


## 2. Feature Engineering for Betting Models

In [4]:
def create_betting_features(df):
    """
    Create features specifically designed for betting predictions
    """
    df = df.sort_values(['driverId', 'date']).copy()
    
    # Recent form features (last 5 races)
    for window in [3, 5, 10]:
        df[f'avg_position_last_{window}'] = df.groupby('driverId')['positionOrder'].transform(
            lambda x: x.shift(1).rolling(window=window, min_periods=1).mean()
        )
        df[f'dnf_rate_last_{window}'] = df.groupby('driverId')['finished'].transform(
            lambda x: 1 - x.shift(1).rolling(window=window, min_periods=1).mean()
        )
        df[f'points_rate_last_{window}'] = df.groupby('driverId')['points_scored'].transform(
            lambda x: x.shift(1).rolling(window=window, min_periods=1).mean()
        )
    
    # Constructor form
    df['constructor_avg_position'] = df.groupby(['constructorId', 'raceId'])['positionOrder'].transform('mean')
    df['constructor_reliability'] = df.groupby('constructorId')['finished'].transform(
        lambda x: x.shift(1).rolling(window=5, min_periods=1).mean()
    )
    
    # Track-specific features
    df['driver_track_history'] = df.groupby(['driverId', 'circuitId']).cumcount()
    df['driver_track_avg_position'] = df.groupby(['driverId', 'circuitId'])['positionOrder'].transform(
        lambda x: x.shift(1).expanding().mean()
    )
    df['driver_track_dnf_rate'] = df.groupby(['driverId', 'circuitId'])['finished'].transform(
        lambda x: 1 - x.shift(1).expanding().mean()
    )
    
    # Qualifying performance
    df['quali_position'] = df['position_quali'].fillna(20)
    df['quali_to_grid_change'] = df['grid'] - df['quali_position']
    
    # Season progress
    df['season_progress'] = df.groupby('year')['round'].transform(lambda x: x / x.max())
    
    # Championship pressure
    season_standings = df.groupby(['year', 'driverId'])['points'].sum().reset_index()
    season_standings['championship_position'] = season_standings.groupby('year')['points'].rank(ascending=False)
    df = df.merge(season_standings[['year', 'driverId', 'championship_position']], on=['year', 'driverId'], how='left')
    
    return df

df_features = create_betting_features(df_betting)

# Define feature columns for models
feature_cols = [
    'grid', 'quali_position', 'quali_to_grid_change',
    'avg_position_last_3', 'avg_position_last_5', 'avg_position_last_10',
    'dnf_rate_last_3', 'dnf_rate_last_5', 'dnf_rate_last_10',
    'points_rate_last_3', 'points_rate_last_5', 'points_rate_last_10',
    'constructor_avg_position', 'constructor_reliability',
    'driver_track_history', 'driver_track_avg_position', 'driver_track_dnf_rate',
    'season_progress', 'championship_position'
]

# Remove rows with NaN values
df_model = df_features.dropna(subset=feature_cols).copy()
print(f"Model dataset shape after feature engineering: {df_model.shape}")

Model dataset shape after feature engineering: (1683, 52)


## 3. Ordinal Regression for Exact Position Predictions

In [5]:
from sklearn.base import BaseEstimator, ClassifierMixin

class OrdinalRegressionClassifier(BaseEstimator, ClassifierMixin):
    """
    Ordinal regression for predicting race finishing positions
    """
    def __init__(self, base_classifier=None):
        self.base_classifier = base_classifier or LogisticRegression(max_iter=1000)
        self.classifiers = {}
        self.classes_ = None
        
    def fit(self, X, y):
        self.classes_ = np.sort(np.unique(y))
        
        # Train binary classifiers for each threshold
        for i, threshold in enumerate(self.classes_[:-1]):
            # Create binary target: 1 if position <= threshold
            binary_y = (y <= threshold).astype(int)
            
            # Clone and train classifier
            clf = clone(self.base_classifier)
            clf.fit(X, binary_y)
            self.classifiers[threshold] = clf
            
        return self
    
    def predict_proba(self, X):
        n_samples = X.shape[0]
        n_classes = len(self.classes_)
        probas = np.zeros((n_samples, n_classes))
        
        # Get cumulative probabilities
        cum_probas = np.zeros((n_samples, n_classes))
        
        for i, threshold in enumerate(self.classes_[:-1]):
            cum_probas[:, i] = self.classifiers[threshold].predict_proba(X)[:, 1]
        
        # Ensure monotonicity
        for i in range(1, n_classes - 1):
            cum_probas[:, i] = np.maximum(cum_probas[:, i], cum_probas[:, i-1])
        
        # Convert cumulative to individual probabilities
        probas[:, 0] = cum_probas[:, 0]
        for i in range(1, n_classes - 1):
            probas[:, i] = cum_probas[:, i] - cum_probas[:, i-1]
        probas[:, -1] = 1 - cum_probas[:, -2]
        
        # Normalize to ensure sum to 1
        probas = probas / probas.sum(axis=1, keepdims=True)
        
        return probas
    
    def predict(self, X):
        probas = self.predict_proba(X)
        return self.classes_[np.argmax(probas, axis=1)]

from sklearn.base import clone

# Prepare data for position prediction
# Only consider finished races for position prediction
finished_races = df_model[df_model['finished'] == 1].copy()

X = finished_races[feature_cols]
y = finished_races['positionOrder']

# Split data temporally
train_date = '2021-12-31'
train_mask = finished_races['date'] <= train_date
test_mask = finished_races['date'] > train_date

X_train, y_train = X[train_mask], y[train_mask]
X_test, y_test = X[test_mask], y[test_mask]

# Scale features
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

# Train ordinal regression model
print("Training ordinal regression model for position predictions...")
ordinal_model = OrdinalRegressionClassifier(
    base_classifier=GradientBoostingClassifier(n_estimators=100, max_depth=5, random_state=42)
)
ordinal_model.fit(X_train_scaled, y_train)

# Make predictions
position_probas = ordinal_model.predict_proba(X_test_scaled)
predicted_positions = ordinal_model.predict(X_test_scaled)

# Evaluate predictions
mae = np.mean(np.abs(predicted_positions - y_test))
print(f"\nPosition Prediction MAE: {mae:.2f}")

# Visualize prediction accuracy
plt.figure(figsize=(10, 6))
plt.scatter(y_test, predicted_positions, alpha=0.5)
plt.plot([1, 20], [1, 20], 'r--', label='Perfect prediction')
plt.xlabel('Actual Position')
plt.ylabel('Predicted Position')
plt.title('Position Prediction Accuracy')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

Training ordinal regression model for position predictions...


AttributeError: 'GradientBoostingClassifier' object has no attribute 'estimators_'

## 4. Head-to-Head Matchup Predictions

In [None]:
def create_head_to_head_dataset(df):
    """
    Create pairwise comparisons for head-to-head predictions
    """
    h2h_data = []
    
    # Group by race
    for race_id, race_data in df.groupby('raceId'):
        drivers = race_data['driverId'].unique()
        
        # Create all pairwise comparisons
        for i in range(len(drivers)):
            for j in range(i+1, len(drivers)):
                driver1_data = race_data[race_data['driverId'] == drivers[i]].iloc[0]
                driver2_data = race_data[race_data['driverId'] == drivers[j]].iloc[0]
                
                # Only include if both drivers finished
                if driver1_data['finished'] == 1 and driver2_data['finished'] == 1:
                    # Create feature differences
                    features_diff = {}
                    for col in feature_cols:
                        features_diff[f'{col}_diff'] = driver1_data[col] - driver2_data[col]
                    
                    # Target: 1 if driver1 finished ahead
                    features_diff['driver1_wins'] = int(driver1_data['positionOrder'] < driver2_data['positionOrder'])
                    features_diff['race_id'] = race_id
                    features_diff['driver1_id'] = drivers[i]
                    features_diff['driver2_id'] = drivers[j]
                    features_diff['driver1_ref'] = driver1_data['driverRef']
                    features_diff['driver2_ref'] = driver2_data['driverRef']
                    features_diff['date'] = driver1_data['date']
                    
                    h2h_data.append(features_diff)
    
    return pd.DataFrame(h2h_data)

# Create head-to-head dataset
print("Creating head-to-head comparison dataset...")
h2h_df = create_head_to_head_dataset(df_model)

# Define feature columns for H2H
h2h_feature_cols = [col for col in h2h_df.columns if col.endswith('_diff')]

# Split data
train_mask = h2h_df['date'] <= train_date
test_mask = h2h_df['date'] > train_date

X_h2h_train = h2h_df[train_mask][h2h_feature_cols]
y_h2h_train = h2h_df[train_mask]['driver1_wins']
X_h2h_test = h2h_df[test_mask][h2h_feature_cols]
y_h2h_test = h2h_df[test_mask]['driver1_wins']

# Scale features
scaler_h2h = StandardScaler()
X_h2h_train_scaled = scaler_h2h.fit_transform(X_h2h_train)
X_h2h_test_scaled = scaler_h2h.transform(X_h2h_test)

# Train calibrated classifier for H2H predictions
print("Training head-to-head prediction model...")
h2h_base_model = GradientBoostingClassifier(n_estimators=100, max_depth=5, random_state=42)
h2h_model = CalibratedClassifierCV(h2h_base_model, method='isotonic', cv=3)
h2h_model.fit(X_h2h_train_scaled, y_h2h_train)

# Make predictions
h2h_probas = h2h_model.predict_proba(X_h2h_test_scaled)[:, 1]
h2h_predictions = (h2h_probas > 0.5).astype(int)

# Evaluate
from sklearn.metrics import accuracy_score, log_loss
h2h_accuracy = accuracy_score(y_h2h_test, h2h_predictions)
h2h_log_loss = log_loss(y_h2h_test, h2h_probas)

print(f"\nHead-to-Head Prediction Accuracy: {h2h_accuracy:.3f}")
print(f"Head-to-Head Log Loss: {h2h_log_loss:.3f}")

# Calibration plot
from sklearn.calibration import calibration_curve

fraction_of_positives, mean_predicted_value = calibration_curve(
    y_h2h_test, h2h_probas, n_bins=10
)

plt.figure(figsize=(8, 6))
plt.plot(mean_predicted_value, fraction_of_positives, 's-', label='H2H Model')
plt.plot([0, 1], [0, 1], 'k:', label='Perfectly calibrated')
plt.xlabel('Mean predicted probability')
plt.ylabel('Fraction of positives')
plt.title('Head-to-Head Probability Calibration')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 5. DNF Risk Assessment Model

In [None]:
# Prepare DNF prediction dataset
X_dnf = df_model[feature_cols]
y_dnf = (df_model['finished'] == 0).astype(int)  # 1 if DNF

# Split data
train_mask = df_model['date'] <= train_date
test_mask = df_model['date'] > train_date

X_dnf_train, y_dnf_train = X_dnf[train_mask], y_dnf[train_mask]
X_dnf_test, y_dnf_test = X_dnf[test_mask], y_dnf[test_mask]

# Scale features
X_dnf_train_scaled = scaler.transform(X_dnf_train)
X_dnf_test_scaled = scaler.transform(X_dnf_test)

# Train DNF prediction model with calibration
print("Training DNF risk assessment model...")
dnf_base_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
dnf_model = CalibratedClassifierCV(dnf_base_model, method='isotonic', cv=3)
dnf_model.fit(X_dnf_train_scaled, y_dnf_train)

# Make predictions
dnf_probas = dnf_model.predict_proba(X_dnf_test_scaled)[:, 1]

# Analyze DNF rates by different factors
test_data = df_model[test_mask].copy()
test_data['dnf_probability'] = dnf_probas

# DNF probability by constructor
constructor_dnf = test_data.groupby('constructorRef').agg({
    'dnf_probability': 'mean',
    'finished': lambda x: 1 - x.mean()  # Actual DNF rate
}).round(3)
constructor_dnf.columns = ['Predicted DNF Rate', 'Actual DNF Rate']
constructor_dnf = constructor_dnf.sort_values('Predicted DNF Rate', ascending=False)

print("\nDNF Risk by Constructor:")
print(constructor_dnf.head(10))

# Visualize DNF probabilities distribution
plt.figure(figsize=(10, 6))
plt.hist(dnf_probas[y_dnf_test == 0], bins=30, alpha=0.5, label='Finished', density=True)
plt.hist(dnf_probas[y_dnf_test == 1], bins=30, alpha=0.5, label='DNF', density=True)
plt.xlabel('Predicted DNF Probability')
plt.ylabel('Density')
plt.title('DNF Probability Distribution')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

## 6. Points Scoring Probability Model

In [None]:
# Create multi-class target for points ranges
def categorize_points(points):
    if points == 0:
        return 0  # No points
    elif points <= 4:
        return 1  # 1-4 points (P7-P10)
    elif points <= 10:
        return 2  # 6-10 points (P5-P6)
    elif points <= 18:
        return 3  # 12-18 points (P3-P4)
    else:
        return 4  # 25+ points (P1-P2)

df_model['points_category'] = df_model['points'].apply(categorize_points)

# Prepare data
X_points = df_model[feature_cols]
y_points = df_model['points_category']

# Split data
X_points_train, y_points_train = X_points[train_mask], y_points[train_mask]
X_points_test, y_points_test = X_points[test_mask], y_points[test_mask]

# Scale features
X_points_train_scaled = scaler.transform(X_points_train)
X_points_test_scaled = scaler.transform(X_points_test)

# Train points prediction model
print("Training points scoring probability model...")
points_model = RandomForestClassifier(n_estimators=100, max_depth=10, random_state=42)
points_model.fit(X_points_train_scaled, y_points_train)

# Get probability predictions
points_probas = points_model.predict_proba(X_points_test_scaled)

# Calculate expected points
points_mapping = {0: 0, 1: 2, 2: 8, 3: 15, 4: 25}  # Average points per category
expected_points = np.sum(points_probas * np.array([points_mapping[i] for i in range(5)]), axis=1)

# Actual points for comparison
actual_points = df_model[test_mask]['points'].values

# Evaluate expected points accuracy
points_mae = np.mean(np.abs(expected_points - actual_points))
print(f"\nExpected Points MAE: {points_mae:.2f}")

# Visualize expected vs actual points
plt.figure(figsize=(10, 6))
plt.scatter(actual_points, expected_points, alpha=0.5)
plt.plot([0, 25], [0, 25], 'r--', label='Perfect prediction')
plt.xlabel('Actual Points')
plt.ylabel('Expected Points')
plt.title('Expected Points vs Actual Points')
plt.legend()
plt.grid(True, alpha=0.3)
plt.show()

# Show probability distribution for a sample of drivers
test_races = df_model[test_mask].iloc[:20]
fig, ax = plt.subplots(figsize=(12, 6))

# Create stacked bar chart of point probabilities
bottom = np.zeros(20)
colors = ['red', 'orange', 'yellow', 'lightgreen', 'darkgreen']
labels = ['No points', '1-4 pts', '6-10 pts', '12-18 pts', '25+ pts']

for i in range(5):
    ax.bar(range(20), points_probas[:20, i], bottom=bottom, 
           label=labels[i], color=colors[i], alpha=0.8)
    bottom += points_probas[:20, i]

ax.set_xlabel('Driver (sample)')
ax.set_ylabel('Probability')
ax.set_title('Points Scoring Probability Distribution')
ax.legend()
ax.set_xticks(range(20))
ax.set_xticklabels(test_races['driverRef'].values, rotation=45)
plt.tight_layout()
plt.show()

## 7. Integrated Betting Odds Generation

In [None]:
def generate_betting_odds(race_data, models, scalers):
    """
    Generate comprehensive betting odds for a race
    """
    # Prepare features
    X = race_data[feature_cols]
    X_scaled = scalers['main'].transform(X)
    
    odds_data = []
    
    for idx, (_, driver) in enumerate(race_data.iterrows()):
        driver_odds = {
            'driver': driver['driverRef'],
            'constructor': driver['constructorRef'],
            'grid': driver['grid']
        }
        
        # DNF probability
        dnf_prob = models['dnf'].predict_proba(X_scaled[idx:idx+1])[:, 1][0]
        driver_odds['dnf_probability'] = dnf_prob
        driver_odds['finish_probability'] = 1 - dnf_prob
        
        # Position probabilities (if finishes)
        if 'position' in models:
            pos_probas = models['position'].predict_proba(X_scaled[idx:idx+1])[0]
            driver_odds['win_probability'] = pos_probas[0] * (1 - dnf_prob)
            driver_odds['podium_probability'] = pos_probas[:3].sum() * (1 - dnf_prob)
            driver_odds['top5_probability'] = pos_probas[:5].sum() * (1 - dnf_prob)
            driver_odds['top10_probability'] = pos_probas[:10].sum() * (1 - dnf_prob)
        
        # Points probability
        points_probas = models['points'].predict_proba(X_scaled[idx:idx+1])[0]
        driver_odds['points_probability'] = 1 - points_probas[0]  # Probability of scoring any points
        
        # Expected points
        points_mapping = {0: 0, 1: 2, 2: 8, 3: 15, 4: 25}
        driver_odds['expected_points'] = sum(points_probas[i] * points_mapping[i] for i in range(5))
        
        # Convert probabilities to decimal odds
        for key in ['win_probability', 'podium_probability', 'top5_probability', 
                   'top10_probability', 'points_probability']:
            if key in driver_odds and driver_odds[key] > 0:
                driver_odds[f'{key.replace("_probability", "")}_odds'] = 1 / driver_odds[key]
        
        odds_data.append(driver_odds)
    
    return pd.DataFrame(odds_data)

# Example: Generate odds for a recent race
recent_race = df_model[df_model['date'] > '2023-01-01'].groupby('raceId').first().index[0]
race_data = df_model[df_model['raceId'] == recent_race]

# Collect all models
models = {
    'dnf': dnf_model,
    'points': points_model,
    # 'position': ordinal_model  # Commented out due to complexity in example
}

scalers = {
    'main': scaler,
    'h2h': scaler_h2h
}

# Generate odds
race_odds = generate_betting_odds(race_data, models, scalers)
race_odds = race_odds.sort_values('expected_points', ascending=False)

print("\nBetting Odds for Sample Race:")
print("=" * 80)
print(race_odds[['driver', 'constructor', 'grid', 'dnf_probability', 
                'points_probability', 'expected_points']].round(3))

# Visualize odds
fig, (ax1, ax2) = plt.subplots(1, 2, figsize=(15, 6))

# Points probability by driver
ax1.barh(race_odds['driver'][:15], race_odds['points_probability'][:15])
ax1.set_xlabel('Probability of Scoring Points')
ax1.set_title('Points Scoring Probability by Driver')
ax1.grid(True, alpha=0.3)

# Expected points vs grid position
ax2.scatter(race_odds['grid'], race_odds['expected_points'], s=100)
for idx, row in race_odds.iterrows():
    ax2.annotate(row['driver'][:3], (row['grid'], row['expected_points']), fontsize=8)
ax2.set_xlabel('Grid Position')
ax2.set_ylabel('Expected Points')
ax2.set_title('Expected Points vs Grid Position')
ax2.grid(True, alpha=0.3)

plt.tight_layout()
plt.show()

## 8. Model Confidence and Uncertainty Quantification

In [None]:
def calculate_prediction_confidence(models, X_scaled, n_iterations=100):
    """
    Calculate prediction confidence using bootstrapping
    """
    n_samples = X_scaled.shape[0]
    
    # Store predictions from each iteration
    dnf_predictions = []
    points_predictions = []
    
    # Bootstrap predictions
    for _ in range(n_iterations):
        # Sample with replacement
        idx = np.random.choice(n_samples, size=n_samples, replace=True)
        X_boot = X_scaled[idx]
        
        # Get predictions
        dnf_pred = models['dnf'].predict_proba(X_boot)[:, 1]
        points_pred = models['points'].predict_proba(X_boot)
        
        dnf_predictions.append(dnf_pred)
        points_predictions.append(points_pred)
    
    # Calculate confidence intervals
    dnf_predictions = np.array(dnf_predictions)
    dnf_mean = np.mean(dnf_predictions, axis=0)
    dnf_std = np.std(dnf_predictions, axis=0)
    dnf_lower = np.percentile(dnf_predictions, 5, axis=0)
    dnf_upper = np.percentile(dnf_predictions, 95, axis=0)
    
    confidence_data = pd.DataFrame({
        'dnf_mean': dnf_mean,
        'dnf_std': dnf_std,
        'dnf_lower_90': dnf_lower,
        'dnf_upper_90': dnf_upper,
        'dnf_confidence': 1 - (dnf_std / (dnf_mean + 0.001))  # Confidence metric
    })
    
    return confidence_data

# Calculate confidence for test predictions
print("Calculating prediction confidence intervals...")
confidence = calculate_prediction_confidence(models, X_test_scaled[:100])

# Visualize confidence intervals
plt.figure(figsize=(12, 6))
sample_idx = range(20)
plt.errorbar(sample_idx, 
            confidence['dnf_mean'][:20], 
            yerr=[confidence['dnf_mean'][:20] - confidence['dnf_lower_90'][:20],
                  confidence['dnf_upper_90'][:20] - confidence['dnf_mean'][:20]],
            fmt='o', capsize=5)
plt.xlabel('Sample')
plt.ylabel('DNF Probability')
plt.title('DNF Predictions with 90% Confidence Intervals')
plt.grid(True, alpha=0.3)
plt.show()

print("\nPrediction Confidence Summary:")
print(f"Average confidence: {confidence['dnf_confidence'].mean():.3f}")
print(f"Min confidence: {confidence['dnf_confidence'].min():.3f}")
print(f"Max confidence: {confidence['dnf_confidence'].max():.3f}")

## Key Features of the Betting Market Models:

1. **Ordinal Regression**: Properly handles the ordered nature of finishing positions

2. **Head-to-Head Predictions**: Direct driver comparison probabilities for matchup betting

3. **DNF Risk Assessment**: Calibrated probabilities for non-finish outcomes

4. **Points Scoring Model**: Multi-class prediction for different points brackets

5. **Probability Calibration**: Ensures predicted probabilities match actual frequencies

6. **Integrated Odds Generation**: Combines all models to produce comprehensive betting odds

7. **Uncertainty Quantification**: Provides confidence intervals for predictions

These models provide the foundation for a professional betting platform with accurate, calibrated probabilities suitable for odds generation and risk management.