In [7]:
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt

# MatchStats Class
class MatchStats:
    def __init__(self, raw_data, match_to_examine):
        self.match = raw_data[raw_data['match_id'] == match_to_examine]
        self.player1_name = self.match['player1'].values[0]
        self.player2_name = self.match['player2'].values[0]
        self.player1_surname = self.match['p1_lastname'].values[0]
        self.player2_surname = self.match['p2_lastname'].values[0]

        self.names = [self.player1_name, self.player2_name]
        self.surnames = [self.player1_surname, self.player2_surname]

        # Identify set change points
        self.set_change_points = np.where(np.diff(self.match['set_no']) > 0)[0]
        self.set_change_points = np.append(self.set_change_points, len(self.match) - 1)

        # Match statistics
        self.match_winner = self.match['set_victor'].iloc[-1]
        self.set_victors = self.match['set_victor'][self.match['set_victor'] != 0]
        self.point_victors = self.match['point_victor'][self.match['point_victor'] != 0]
        self.game_victors = self.match['game_victor'].values
        self.unf_err = 2 * self.match['p2_unf_err'].values + self.match['p1_unf_err'].values

        self.winner_id = self.match_winner
        self.winner_name = self.names[self.winner_id - 1]

# Models
class SetWinnerModel(MatchStats):
    def __init__(self, raw_data, match_to_examine):
        super().__init__(raw_data, match_to_examine)

    def fit(self):
        pass  # No fitting required

    def prediction(self):
        pred = self.set_victors.values.astype(float)
        pred = np.insert(pred[:-1], 0, np.nan)
        return pred

class CumulativeSetWinnerModel(MatchStats):
    def __init__(self, raw_data, match_to_examine):
        super().__init__(raw_data, match_to_examine)

    def fit(self):
        self.p1_cumulative = np.cumsum(self.set_victors.values == 1)
        self.p2_cumulative = np.cumsum(self.set_victors.values == 2)

    def prediction(self):
        # Match cumulative statistics to set change points
        predictions = []
        for idx in range(len(self.set_victors)):
            p1_sets = self.p1_cumulative[idx]
            p2_sets = self.p2_cumulative[idx]

            if p2_sets > p1_sets:
                predictions.append(2)
            elif p1_sets > p2_sets:
                predictions.append(1)
            else:
                predictions.append(predictions[-1] if predictions else np.nan)
        return np.array(predictions)

class CumulativePointWinnerModel(MatchStats):
    def __init__(self, raw_data, match_to_examine):
        super().__init__(raw_data, match_to_examine)

    def fit(self):
        # Calculate cumulative points won by each player
        self.p1_point_cumulative = np.cumsum(self.point_victors == 1)
        self.p2_point_cumulative = np.cumsum(self.point_victors == 2)

        # Ensure set change points align with cumulative array
        self.set_change_points = np.where(np.diff(self.match['set_no'].values) > 0)[0]
        self.set_change_points = self.set_change_points[self.set_change_points < len(self.p1_point_cumulative)]
        self.set_change_points = np.append(self.set_change_points, len(self.p1_point_cumulative) - 1)

    def prediction(self):
        predictions = []
        for idx in self.set_change_points:
            p1_points = self.p1_point_cumulative[idx]
            p2_points = self.p2_point_cumulative[idx]

            if p2_points > p1_points:
                predictions.append(2)
            elif p1_points > p2_points:
                predictions.append(1)
            else:
                predictions.append(predictions[-1] if predictions else np.nan)
        return np.array(predictions)

class CumulativeGameWinnerModel(MatchStats):
    def __init__(self, raw_data, match_to_examine):
        super().__init__(raw_data, match_to_examine)

    def fit(self):
        self.p1_game_cumulative = np.cumsum(self.game_victors == 1)
        self.p2_game_cumulative = np.cumsum(self.game_victors == 2)

    def prediction(self):
        predictions = []
        for idx in self.set_change_points:
            p1_games = self.p1_game_cumulative[idx]
            p2_games = self.p2_game_cumulative[idx]

            if p2_games > p1_games:
                predictions.append(2)
            elif p1_games > p2_games:
                predictions.append(1)
            else:
                predictions.append(predictions[-1] if predictions else np.nan)
        return np.array(predictions)

class CumulativeUnfErrModel(MatchStats):
    def __init__(self, raw_data, match_to_examine):
        super().__init__(raw_data, match_to_examine)

    def fit(self):
        self.p1_error_cumulative = np.cumsum(self.unf_err == 1)
        self.p2_error_cumulative = np.cumsum(self.unf_err == 2)

    def prediction(self):
        predictions = []
        for idx in self.set_change_points:
            p1_errors = self.p1_error_cumulative[idx]
            p2_errors = self.p2_error_cumulative[idx]

            if p2_errors < p1_errors:
                predictions.append(2)
            elif p1_errors < p2_errors:
                predictions.append(1)
            else:
                predictions.append(predictions[-1] if predictions else np.nan)
        return np.array(predictions)

# Demo
if __name__ == "__main__":
    import tennis_data

    # Load data
    df_raw = tennis_data.load_2021()
    matches = df_raw['match_id'].unique()
    my_match = matches[1]

    stats = MatchStats(df_raw, my_match)

    # Models
    models = [
        SetWinnerModel(df_raw, my_match),
        CumulativeSetWinnerModel(df_raw, my_match),
        CumulativePointWinnerModel(df_raw, my_match),
        CumulativeGameWinnerModel(df_raw, my_match),
        CumulativeUnfErrModel(df_raw, my_match),
    ]

    all_predictions = {}
    for model in models:
        model.fit()
        all_predictions[model.__class__.__name__] = model.prediction()

    # Convert predictions to DataFrame
    pred_df = pd.DataFrame(all_predictions)
    pred_df.index = [f"Set {i+1}" for i in range(len(pred_df))]

    print(pred_df)

    # Plot predictions
    plot_data = pred_df.reset_index().melt(id_vars='index', var_name='Model', value_name='Prediction')
    plot_data.rename(columns={'index': 'Set'}, inplace=True)

    plt.figure(figsize=(12, 6))
    sns.lineplot(data=plot_data, x='Set', y='Prediction', hue='Model', marker='o')
    plt.title('Model Predictions by Set')
    plt.ylabel('Prediction (Winner)')
    plt.xlabel('Set')
    plt.legend(title='Model')
    plt.grid()
    plt.show()

KeyError: 73