In [10]:
import pandas as pd

df = pd.read_csv(r"preprocessed_2019_data.csv")

print(df)

      Match Date           Sport  Event Type  \
0     2019-03-31  doubles_squash  Club Match   
1     2019-03-29  doubles_squash  Club Match   
2     2019-03-28  doubles_squash      League   
3     2019-03-28  doubles_squash      League   
4     2019-03-28  doubles_squash      League   
...          ...             ...         ...   
1228  2018-04-04  doubles_squash  Tournament   
1229  2018-04-04  doubles_squash  Tournament   
1230  2018-04-04  doubles_squash  Tournament   
1231  2018-04-03  doubles_squash  Club Match   
1232  2018-04-01  doubles_squash  Club Match   

                             Event Name Match Status Game Scores  \
0               Vancouver Racquets Club       Normal     ['3-0']   
1               Vancouver Racquets Club       Normal     ['3-0']   
2       Vancouver Ladies Doubles League       Normal     ['1-3']   
3       Vancouver Ladies Doubles League       Normal     ['3-1']   
4       Vancouver Ladies Doubles League       Normal     ['3-0']   
...            

In [11]:
print(df.columns)

Index(['Match Date', 'Sport', 'Event Type', 'Event Name', 'Match Status',
       'Game Scores', 'Player A1', 'Player A1 Squash BC Number',
       'Player A1 Gender', 'Player A2', 'Player A2 Squash BC Number',
       'Player A2 Gender', 'Player B1', 'Player B1 Squash BC Number',
       'Player B1 Gender', 'Player B2', 'Player B2 Squash BC Number',
       'Player B2 Gender'],
      dtype='object')


In [12]:
df = df[
    [
        "Match Date",
        "Game Scores",
        "Player A1",
        "Player A2",
        "Player B1",
        "Player B2",
    ]
]

In [13]:
# Function to calculate expected win probability based on Elo ratings
def expected_win_probability(player_elo, opponent_elo):
    return 1 / (1 + 10 ** ((opponent_elo - player_elo) / 400))

In [14]:
def elo_sys_with_scorefactor_and_dynamic_k(elo_ratings):
    # Initialize parameters for the Elo system
    starting_elo = 1200  # Starting Elo rating for new players
    
    # Function to calculate K-factor based on Elo rating
    def calculate_k_factor(elo_rating):
        if elo_rating < 2000:
            return 30
        elif elo_rating < 2400:
            return 20
        else:
            return 10

    # Iterate through each row (match) in the DataFrame
    for index, row in df.iterrows():
        # Extract match details
        game_score = row["Game Scores"]
        scores = game_score.split("-")
        score_a = int(scores[0][-1])
        score_b = int(scores[1][0])

        # Extract player names
        player_a1 = row["Player A1"]
        player_a2 = row["Player A2"]
        player_b1 = row["Player B1"]
        player_b2 = row["Player B2"]

        # Initialize Elo ratings for new players
        for player in [player_a1, player_a2, player_b1, player_b2]:
            if player not in elo_ratings:
                elo_ratings[player] = starting_elo

        # Calculate expected win probability for each team
        team_a_elo = (elo_ratings[player_a1] + elo_ratings[player_a2]) / 2
        team_b_elo = (elo_ratings[player_b1] + elo_ratings[player_b2]) / 2
        expected_win_a = expected_win_probability(team_a_elo, team_b_elo)
        expected_win_b = 1 - expected_win_a

        # Update Elo ratings based on actual outcome
        k_factor_a = calculate_k_factor(team_a_elo)
        k_factor_b = calculate_k_factor(team_b_elo)

        # Determine the score difference factor
        score_difference = abs(score_a - score_b)
        score_factor = 1 + (score_difference / 3)  # Adjust this factor as needed

        if score_a > score_b:
            # Team A won
            elo_ratings[player_a1] += k_factor_a * score_factor * (1 - expected_win_a)
            elo_ratings[player_a2] += k_factor_a * score_factor * (1 - expected_win_a)
            elo_ratings[player_b1] += k_factor_b * score_factor * (0 - expected_win_b)
            elo_ratings[player_b2] += k_factor_b * score_factor * (0 - expected_win_b)
        elif score_a < score_b:
            # Team B won
            elo_ratings[player_a1] += k_factor_a * score_factor * (0 - expected_win_a)
            elo_ratings[player_a2] += k_factor_a * score_factor * (0 - expected_win_a)
            elo_ratings[player_b1] += k_factor_b * score_factor * (1 - expected_win_b)
            elo_ratings[player_b2] += k_factor_b * score_factor * (1 - expected_win_b)
        else:
            # Draw
            pass
    return elo_ratings

In [15]:
def elo_sys_baseline(elo_ratings):
# Initialize parameters for the Elo system
    starting_elo = 1200  # Starting Elo rating for new players

    # Iterate through each row (match) in the DataFrame
    for index, row in df.iterrows():
        # Extract match details
        game_score = row["Game Scores"]
        scores = game_score.split("-")
        score_a = int(scores[0][-1])
        score_b = int(scores[1][0])

        # Extract player names
        player_a1 = row["Player A1"]
        player_a2 = row["Player A2"]
        player_b1 = row["Player B1"]
        player_b2 = row["Player B2"]

        # Initialize Elo ratings for new players
        for player in [player_a1, player_a2, player_b1, player_b2]:
            if player not in elo_ratings:
                elo_ratings[player] = starting_elo

        # Calculate expected win probability for each team
        team_a_elo = (elo_ratings[player_a1] + elo_ratings[player_a2]) / 2
        team_b_elo = (elo_ratings[player_b1] + elo_ratings[player_b2]) / 2
        expected_win_a = expected_win_probability(team_a_elo, team_b_elo)
        expected_win_b = 1 - expected_win_a

        # Update Elo ratings based on actual outcome
        k_factor_a = 32
        k_factor_b = 32

        if score_a > score_b:
            # Team A won
            elo_ratings[player_a1] += k_factor_a * (1 - expected_win_a)
            elo_ratings[player_a2] += k_factor_a * (1 - expected_win_a)
            elo_ratings[player_b1] += k_factor_b  * (0 - expected_win_b)
            elo_ratings[player_b2] += k_factor_b  * (0 - expected_win_b)
        elif score_a < score_b:
            # Team B won
            elo_ratings[player_a1] += k_factor_a * (0 - expected_win_a)
            elo_ratings[player_a2] += k_factor_a * (0 - expected_win_a)
            elo_ratings[player_b1] += k_factor_b * (1 - expected_win_b)
            elo_ratings[player_b2] += k_factor_b * (1 - expected_win_b)
        else:
            # Draw
            pass
    return elo_ratings

In [57]:
# Function to calculate expected win probability based on Elo ratings
def expected_win_probability_norm(player_elo, opponent_elo):
    return 1 / (1 + 10 ** ((opponent_elo - player_elo) / 1.2))

In [59]:
def elo_sys_with_scorefactor_and_dynamic_k_normalized(elo_ratings):
    # Initialize parameters for the Elo system
    starting_elo = 1.5  # Starting Elo rating for new players

    # Function to calculate K-factor based on Elo rating
    def calculate_k_factor(elo_rating):
        if elo_rating < 2000:
            return 0.5
        elif elo_rating < 2400:
            return 0.2
        else:
            return 0.1

    # Iterate through each row (match) in the DataFrame
    for index, row in df.iterrows():
        # Extract match details
        game_score = row["Game Scores"]
        scores = game_score.split("-")
        score_a = int(scores[0][-1])
        score_b = int(scores[1][0])

        # Extract player names
        player_a1 = row["Player A1"]
        player_a2 = row["Player A2"]
        player_b1 = row["Player B1"]
        player_b2 = row["Player B2"]

        # Initialize Elo ratings for new players
        for player in [player_a1, player_a2, player_b1, player_b2]:
            if player not in elo_ratings:
                elo_ratings[player] = starting_elo

        # Calculate expected win probability for each team
        team_a_elo = (elo_ratings[player_a1] + elo_ratings[player_a2]) / 2
        team_b_elo = (elo_ratings[player_b1] + elo_ratings[player_b2]) / 2
        expected_win_a = expected_win_probability_norm(team_a_elo, team_b_elo)
        expected_win_b = 1 - expected_win_a

        # Update Elo ratings based on actual outcome
        k_factor_a = calculate_k_factor(team_a_elo)
        k_factor_b = calculate_k_factor(team_b_elo)

        # Determine the score difference factor
        score_difference = abs(score_a - score_b)
        score_factor = 1 + (score_difference / 3)  # Adjust this factor as needed

        if score_a > score_b:
            # Team A won
            elo_ratings[player_a1] += k_factor_a * score_factor * (1 - expected_win_a)
            elo_ratings[player_a2] += k_factor_a * score_factor * (1 - expected_win_a)
            elo_ratings[player_b1] += k_factor_b * score_factor * (0 - expected_win_b)
            elo_ratings[player_b2] += k_factor_b * score_factor * (0 - expected_win_b)
        elif score_a < score_b:
            # Team B won
            elo_ratings[player_a1] += k_factor_a * score_factor * (0 - expected_win_a)
            elo_ratings[player_a2] += k_factor_a * score_factor * (0 - expected_win_a)
            elo_ratings[player_b1] += k_factor_b * score_factor * (1 - expected_win_b)
            elo_ratings[player_b2] += k_factor_b * score_factor * (1 - expected_win_b)
        else:
            # Draw
            pass
    return elo_ratings

In [60]:
def check_res(name):
    # Initialize win and loss counters
    wins = 0
    losses = 0

    # Iterate through each row in the DataFrame
    for index, row in df.iterrows():
        # Extract match details
        game_score = row["Game Scores"]
        scores = game_score.split("-")
        score_a = int(scores[0][-1])
        score_b = int(scores[1][0])

        # Extract player names (assuming the capitalization of 'p' in 'Player')
        player_a1 = row["Player A1"]
        player_a2 = row["Player A2"]
        player_b1 = row["Player B1"]
        player_b2 = row["Player B2"]

        # Check if player is part of the winning team
        if name in [player_a1, player_a2] and score_a > score_b:
            wins += 1
        elif name in [player_b1, player_b2] and score_b > score_a:
            wins += 1
        elif name in [player_a1, player_a2] or name in [player_b1, player_b2]:
            losses += 1

    return wins, losses

In [61]:
# Initialize Elo ratings for each player
elo_ratings = {}
elo_ratings = elo_sys_with_scorefactor_and_dynamic_k(elo_ratings)
elo_ratings_base = {}
elo_ratings_base = elo_sys_baseline(elo_ratings_base)
elo_ratings_norm = {}
elo_ratings_norm = elo_sys_with_scorefactor_and_dynamic_k_normalized(elo_ratings_norm)

# Display or save the updated Elo ratings for each player
sorted_elo_ratings = sorted(elo_ratings.items(), key=lambda x: x[1], reverse=True)
sorted_elo_ratings_base = sorted(
    elo_ratings_base.items(), key=lambda x: x[1], reverse=True
)
sorted_elo_ratings_norm = sorted(
    elo_ratings_norm.items(), key=lambda x: x[1], reverse=True
)

In [62]:
# Display the top 5 players for each system
print("Top 5 Players - System: elo_sys_with_scorefactor_and_dynamic_k")
for player, elo in sorted_elo_ratings[:5]:
    print(f"Player: {player}, Elo: {elo}")

print("\nTop 5 Players - System: elo_sys_baseline")
for player, elo in sorted_elo_ratings_base[:5]:
    print(f"Player: {player}, Elo: {elo}")

print("\nTop 5 Players - System: elo_sys_with_scorefactor_and_dynamic_k_normalized")
for player, elo in sorted_elo_ratings_norm[:5]:
    print(f"Player: {player}, Elo: {elo}")

Top 5 Players - System: elo_sys_with_scorefactor_and_dynamic_k
Player: Andrew Smart, Elo: 1511.2650313152883
Player: Cathy Covernton, Elo: 1504.343833424215
Player: Jeff Boag, Elo: 1495.2608425369356
Player: Brian Covernton, Elo: 1491.4781423595089
Player: Rob Fought, Elo: 1412.396405549641

Top 5 Players - System: elo_sys_baseline
Player: Brian Covernton, Elo: 1426.470519480972
Player: Cathy Covernton, Elo: 1413.563321878689
Player: Andrew Smart, Elo: 1407.9827511315116
Player: Jeff Boag, Elo: 1382.4757766730072
Player: Carl McCreath, Elo: 1347.5657720322013

Top 5 Players - System: elo_sys_with_scorefactor_and_dynamic_k_normalized
Player: Justin Todd, Elo: 4.620509446889822
Player: Andrew Smart, Elo: 4.362965124294854
Player: Jeff Boag, Elo: 4.344357040718843
Player: Rob Fought, Elo: 4.2367488379884515
Player: David Goldsmith, Elo: 4.118937361869153


In [63]:
from tabulate import tabulate

# Combine Elo ratings for all systems
combined_ratings = {
    "System: elo_sys_with_scorefactor_and_dynamic_k": sorted(
        elo_ratings.items(), key=lambda x: x[1], reverse=True
    ),
    "System: elo_sys_baseline": sorted(
        elo_ratings_base.items(), key=lambda x: x[1], reverse=True
    ),
    "System: elo_sys_with_scorefactor_and_dynamic_k_normalized": sorted(
        elo_ratings_norm.items(), key=lambda x: x[1], reverse=True
    ),
}

# Create a table for the top 10 players for each system
table_data = []
for system, ratings in combined_ratings.items():
    top_10_players = [(player, elo) for player, elo in ratings[:10]]
    table_data.append([system] + top_10_players)

# Transpose the table
transposed_table_data = list(zip(*table_data))

# Print the transposed table
print(tabulate(transposed_table_data, tablefmt="grid"))

+------------------------------------------------+----------------------------------------+-----------------------------------------------------------+
| System: elo_sys_with_scorefactor_and_dynamic_k | System: elo_sys_baseline               | System: elo_sys_with_scorefactor_and_dynamic_k_normalized |
+------------------------------------------------+----------------------------------------+-----------------------------------------------------------+
| ('Andrew Smart', 1511.2650313152883)           | ('Brian Covernton', 1426.470519480972) | ('Justin Todd', 4.620509446889822)                        |
+------------------------------------------------+----------------------------------------+-----------------------------------------------------------+
| ('Cathy Covernton', 1504.343833424215)         | ('Cathy Covernton', 1413.563321878689) | ('Andrew Smart', 4.362965124294854)                       |
+------------------------------------------------+--------------------------------------

In [65]:
# Combine Elo ratings for all systems
combined_ratings = {
    "System: elo_sys_with_scorefactor_and_dynamic_k": sorted(
        elo_ratings.items(), key=lambda x: x[1], reverse=True
    ),
    "System: elo_sys_baseline": sorted(
        elo_ratings_base.items(), key=lambda x: x[1], reverse=True
    ),
    "System: elo_sys_with_scorefactor_and_dynamic_k_normalized": sorted(
        elo_ratings_norm.items(), key=lambda x: x[1], reverse=True
    ),
}

# Create a table for the top 10 and worst 10 players for each system
table_data = []
for system, ratings in combined_ratings.items():
    worst_10_players = [(player, elo) for player, elo in ratings[-10:]]
    table_data.append([system] + worst_10_players)

# Transpose the table
transposed_table_data = list(zip(*table_data))

# Print the transposed table
print(tabulate(transposed_table_data, tablefmt="grid"))

+------------------------------------------------+-----------------------------------------+-----------------------------------------------------------+
| System: elo_sys_with_scorefactor_and_dynamic_k | System: elo_sys_baseline                | System: elo_sys_with_scorefactor_and_dynamic_k_normalized |
+------------------------------------------------+-----------------------------------------+-----------------------------------------------------------+
| ('Maurice Goulet', 1013.6040654887746)         | ('Angel Wong', 1082.9000387428946)      | ('Boyd Muir', -0.27882644138261703)                       |
+------------------------------------------------+-----------------------------------------+-----------------------------------------------------------+
| ('Tom Locke', 1010.7219267327181)              | ('Connie Engh', 1082.6776092635769)     | ('Miranda Lam', -0.30735644027693565)                     |
+------------------------------------------------+--------------------------------

In [67]:
names = [
    "Andrew Smart",
    "Brian Covernton",
    "Justin Todd",
    "Annette Johanson",
    "Jeff Ward",
]
for name in names:
    wins,losses = check_res(name)
    print(f"{name}'s Win-Loss Record: {wins}-{losses}")

Andrew Smart's Win-Loss Record: 19-4
Brian Covernton's Win-Loss Record: 54-29
Justin Todd's Win-Loss Record: 9-4
Annette Johanson's Win-Loss Record: 7-21
Jeff Ward's Win-Loss Record: 16-41
