In [2]:
import pandas as pd

df_elo_ratings = pd.read_csv('../data/processed/player_elo_ratings.csv')
df_validation = pd.read_csv('../data/processed/atp_matches_2025.csv')
df_test = pd.read_csv('../data/processed/wimbledon_2025.csv')

In [3]:
""" Prediction Function """

def predict_match_winner(player1, player2, surface=None, elo_ratings_df=None):
    """
    Predict the winner of a match based on Elo ratings
    
    Args:
        player1: Name of first player
        player2: Name of second player
        surface: Surface type ('Hard', 'Clay', 'Grass') - if None, uses overall Elo
        elo_ratings_df: DataFrame with Elo ratings
    
    Returns:
        tuple: (predicted_winner, win_probability, player1_elo, player2_elo)
    """
    # Get player ratings
    player1_data = elo_ratings_df[elo_ratings_df['player'] == player1]
    player2_data = elo_ratings_df[elo_ratings_df['player'] == player2]
    
    # If players not found, return None
    if len(player1_data) == 0 or len(player2_data) == 0:
        return None, None, None, None
    
    # Determine which Elo column to use
    if surface == 'Hard':
        elo_col = 'elo_hard'
    elif surface == 'Clay':
        elo_col = 'elo_clay'
    elif surface == 'Grass':
        elo_col = 'elo_grass'
    else:
        elo_col = 'elo_overall'
    
    player1_elo = player1_data[elo_col].values[0]
    player2_elo = player2_data[elo_col].values[0]
    
    # Calculate win probability using Elo formula
    expected_score_p1 = 1 / (1 + 10 ** ((player2_elo - player1_elo) / 400))
    
    # Predict winner
    predicted_winner = player1 if expected_score_p1 > 0.5 else player2
    win_probability = expected_score_p1 if expected_score_p1 > 0.5 else 1 - expected_score_p1
    
    return predicted_winner, win_probability, player1_elo, player2_elo


def evaluate_predictions(matches_df, elo_ratings_df, surface=None, dataset_name="Dataset"):
    """
    Evaluate prediction accuracy on a set of matches
    
    Args:
        matches_df: DataFrame with columns 'winner_name', 'loser_name'
        elo_ratings_df: DataFrame with Elo ratings
        surface: Surface type (if None, uses overall Elo)
        dataset_name: Name for reporting
    
    Returns:
        dict: Evaluation metrics
    """
    predictions = []
    
    for idx, row in matches_df.iterrows():
        winner = row['winner_name']
        loser = row['loser_name']
        
        # Actual winner is the winner_name column
        actual_winner = winner
        
        # Predict winner (we treat winner_name as player1)
        predicted_winner, win_prob, winner_elo, loser_elo = predict_match_winner(
            winner, loser, surface=surface, elo_ratings_df=elo_ratings_df
        )
        
        # Skip if prediction couldn't be made
        if predicted_winner is None:
            continue
        
        # Check if prediction is correct
        correct = predicted_winner == actual_winner
        
        predictions.append({
            'winner_name': winner,
            'loser_name': loser,
            'predicted_winner': predicted_winner,
            'actual_winner': actual_winner,
            'correct': correct,
            'win_probability': win_prob,
            'winner_elo': winner_elo,
            'loser_elo': loser_elo,
            'elo_diff': abs(winner_elo - loser_elo)
        })
    
    results_df = pd.DataFrame(predictions)
    
    if len(results_df) == 0:
        print(f"No predictions could be made for {dataset_name}")
        return None
    
    # Calculate metrics
    accuracy = results_df['correct'].mean()
    total_matches = len(results_df)
    correct_predictions = results_df['correct'].sum()
    
    print(f"\n{'='*60}")
    print(f"{dataset_name} Results")
    print(f"{'='*60}")
    print(f"Total matches: {total_matches}")
    print(f"Correct predictions: {correct_predictions}")
    print(f"Accuracy: {accuracy:.2%}")
    print(f"Average win probability: {results_df['win_probability'].mean():.2%}")
    print(f"Average Elo difference: {results_df['elo_diff'].mean():.1f}")
    
    # Accuracy by confidence level
    print(f"\nAccuracy by confidence level:")
    for threshold in [0.5, 0.6, 0.7, 0.8, 0.9]:
        confident = results_df[results_df['win_probability'] >= threshold]
        if len(confident) > 0:
            conf_accuracy = confident['correct'].mean()
            print(f"  Probability >= {threshold:.0%}: {conf_accuracy:.2%} ({len(confident)} matches)")
    
    return {
        'accuracy': accuracy,
        'total_matches': total_matches,
        'correct_predictions': correct_predictions,
        'results_df': results_df
    }

In [4]:
""" Evaluate on Validation Set (ATP 2025) - Overall Elo """

validation_results = evaluate_predictions(
    df_validation,
    df_elo_ratings,
    surface=None,
    dataset_name="Validation Set (ATP 2025) - Overall Elo"
)


Validation Set (ATP 2025) - Overall Elo Results
Total matches: 431
Correct predictions: 308
Accuracy: 71.46%
Average win probability: 70.56%
Average Elo difference: 174.4

Accuracy by confidence level:
  Probability >= 50%: 71.46% (431 matches)
  Probability >= 60%: 75.96% (312 matches)
  Probability >= 70%: 79.72% (212 matches)
  Probability >= 80%: 84.75% (118 matches)
  Probability >= 90%: 94.44% (36 matches)


In [5]:
""" Evaluate on Test Set (Wimbledon 2025) - Overall Elo """

test_results_overall = evaluate_predictions(
    df_test,
    df_elo_ratings,
    surface=None,
    dataset_name="Test Set (Wimbledon 2025) - Overall Elo"
)
""" Evaluate on Test Set (Wimbledon 2025) - Grass Elo """

test_results_grass = evaluate_predictions(
    df_test,
    df_elo_ratings,
    surface='Grass',
    dataset_name="Test Set (Wimbledon 2025) - Grass Elo"
)


Test Set (Wimbledon 2025) - Overall Elo Results
Total matches: 30
Correct predictions: 19
Accuracy: 63.33%
Average win probability: 76.72%
Average Elo difference: 252.6

Accuracy by confidence level:
  Probability >= 50%: 63.33% (30 matches)
  Probability >= 60%: 70.83% (24 matches)
  Probability >= 70%: 76.19% (21 matches)
  Probability >= 80%: 73.33% (15 matches)
  Probability >= 90%: 100.00% (6 matches)

Test Set (Wimbledon 2025) - Grass Elo Results
Total matches: 30
Correct predictions: 19
Accuracy: 63.33%
Average win probability: 69.94%
Average Elo difference: 173.0

Accuracy by confidence level:
  Probability >= 50%: 63.33% (30 matches)
  Probability >= 60%: 70.00% (20 matches)
  Probability >= 70%: 80.00% (15 matches)
  Probability >= 80%: 71.43% (7 matches)
  Probability >= 90%: 100.00% (3 matches)


In [6]:
""" Generate Wimbledon Predictions CSV """

def create_predictions_csv(matches_df, elo_ratings_df, surface=None, output_path=None):
    """
    Create a CSV with match predictions
    
    Args:
        matches_df: DataFrame with match data
        elo_ratings_df: DataFrame with Elo ratings
        surface: Surface type (if None, uses overall Elo)
        output_path: Path to save CSV
    
    Returns:
        DataFrame with predictions
    """
    predictions = []
    
    for idx, row in matches_df.iterrows():
        winner = row['winner_name']
        loser = row['loser_name']
        round_name = row['round']
        
        # Actual winner
        actual_winner = winner
        
        # Predict winner (treating winner as player1)
        predicted_winner, win_prob, winner_elo, loser_elo = predict_match_winner(
            winner, loser, surface=surface, elo_ratings_df=elo_ratings_df
        )
        
        # Skip if prediction couldn't be made
        if predicted_winner is None:
            continue
        
        predictions.append({
            'matchup': f"{winner} vs {loser}",
            'round': round_name,
            'predicted_winner': predicted_winner,
            'predicted_win_probability': f"{win_prob:.1%}",
            'actual_winner': actual_winner
        })
    
    predictions_df = pd.DataFrame(predictions)
    
    if output_path:
        predictions_df.to_csv(output_path, index=False)
        print(f"Predictions saved to: {output_path}")
    
    return predictions_df

# Generate Wimbledon predictions using Overall Elo
wimbledon_predictions = create_predictions_csv(
    df_test,
    df_elo_ratings,
    surface=None,  # Use overall Elo
    output_path='../data/processed/wimbledon_predictions.csv'
)

print(f"\nGenerated {len(wimbledon_predictions)} predictions")
print("\nFirst 10 predictions:")
print(wimbledon_predictions.head(10))

Predictions saved to: ../data/processed/wimbledon_predictions.csv

Generated 30 predictions

First 10 predictions:
                                      matchup round     predicted_winner  \
0      Arthur Rinderknech vs Alexander Zverev  R128     Alexander Zverev   
1                 Ethan Quinn vs Henry Searle  R128         Henry Searle   
2             Carlos Alcaraz vs Fabio Fognini  R128       Carlos Alcaraz   
3  Taylor Fritz vs Giovanni Mpetshi Perricard  R128         Taylor Fritz   
4        Kamil Majchrzak vs Matteo Berrettini  R128    Matteo Berrettini   
5                Nicolas Jarry vs Holger Rune  R128          Holger Rune   
6          Nuno Borges vs Francisco Cerundolo  R128  Francisco Cerundolo   
7               Arthur Fery vs Alexei Popyrin  R128       Alexei Popyrin   
8          Novak Djokovic vs Alexandre Muller  R128       Novak Djokovic   
9          Pedro Martinez vs George Loffhagen  R128       Pedro Martinez   

  predicted_win_probability       actual_winner 