# HodgeRank in Python

A Python implementation of HodgeRank on sports data as described in [Sizemore](https://wakespace.lib.wfu.edu/bitstream/handle/10339/38577/Sizemore_wfu_0248M_10444.pdf). 

Dataset: [TeamStatistics](https://www.kaggle.com/datasets/eoinamoore/historical-nba-data-and-player-box-scores?select=TeamStatistics.csv) in NBA Dataset - Box Scores & Stats, 1947 - Today (Kaggle)

In [None]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
# import scipy.sparse as sp

np.set_printoptions(precision=3, suppress=True)

In [None]:
NUM_GAMES = 1000  # Number of games to read

In [None]:
# Load the dataset
# https://www.kaggle.com/datasets/eoinamoore/historical-nba-data-and-player-box-scores?select=TeamStatistics.csv

df = pd.read_csv('TeamStatistics.csv', header=0, index_col=None, nrows=NUM_GAMES)  
df = df[['teamName', 'opponentTeamName', 'win', 'teamScore', 'opponentScore']]
df = df.rename(columns={
    'teamName': 'team_name',
    'opponentTeamName': 'opponent_name',
    'win': 'team_won',
    'teamScore': 'team_score',
    'opponentScore': 'opponent_score'
})

df.head()

Unnamed: 0,team_name,opponent_name,team_won,team_score,opponent_score
0,Pacers,Thunder,0,91,103
1,Thunder,Pacers,1,103,91
2,Pacers,Thunder,1,108,91
3,Thunder,Pacers,0,91,108
4,Pacers,Thunder,0,109,120


In [21]:
# Create a mapping of team names to unique IDs *starting from 0*
# Note: the matrices will also be zero-indexed
unique_teams = np.sort(pd.unique(df[['team_name', 'opponent_name']].values.flatten()))
id_dict: dict[int] = {name: id for id, name in enumerate(unique_teams)}

num_teams = len(unique_teams)
print(f"Number of teams: {num_teams}")

# Map team names to IDs
df['team_id'] = df['team_name'].map(id_dict)
df['opponent_id'] = df['opponent_name'].map(id_dict)

df.head()

Number of teams: 30


Unnamed: 0,team_name,opponent_name,team_won,team_score,opponent_score,team_id,opponent_id
0,Pacers,Thunder,0,91,103,18,25
1,Thunder,Pacers,1,103,91,25,18
2,Pacers,Thunder,1,108,91,18,25
3,Thunder,Pacers,0,91,108,25,18
4,Pacers,Thunder,0,109,120,18,25


In [None]:
# 4.2.4 Weight, pairwise comparison, and binary comparison matrices
# NOT [W^alpha_ij] or [Y^alpha_ij]

# weight matrix, stores number of pairwise comparisons made, symmetric
W = np.zeros((num_teams, num_teams), dtype=np.float64) # Use int16 to save memory, modify as needed

# pairwise comparison matrix, stores rowTeamScore-colTeamScore, antisymmetric
Y = np.zeros((num_teams, num_teams), dtype=np.float64) 

# binary comparison matrix, sum of ordinal comparisons (-1, 0, 1) of whether team i beat team j =
B = np.zeros((num_teams, num_teams), dtype=np.float64)  

for _, row in df.iterrows():
    team_id = row['team_id'] 
    opponent_id = row['opponent_id']  
    
    # sum up W^alpha_ij s
    W[team_id, opponent_id] += 1
    W[opponent_id, team_id] += 1
    
    # sum up Y^alpha_ij s
    Y[team_id, opponent_id] += row['team_score'] - row['opponent_score']
    Y[opponent_id, team_id] += -Y[team_id, opponent_id]

    if row['team_won']:
        B[team_id, opponent_id] += 1
        B[opponent_id, team_id] += -1
    else:
        B[team_id, opponent_id] += -1
        B[opponent_id, team_id] += 1

# element-wise division to get the average comparison, accounting for zero divisions
Y = np.divide(Y, W, where=(W != 0))  
B = np.divide(B, W, where=(W != 0)) 

print(f"Weight matrix W:\n{W}")
print(f"Pairwise comparison matrix Y:\n{Y}")
print(f"Binary comparison matrix B:\n{B}")

Weight matrix W: [[ 0.  2.  4.  0.  2.  0.  0.  6.  4.  0.  2.  0.  4.  0.  0.  2.  2.  0.
   2.  2.  0.  4.  2.  2.  0.  2.  4.  2.  2.  4.]
 [ 2.  0.  0.  2.  0.  2.  0.  4.  4.  0.  0.  2.  2.  4.  2.  4.  0.  4.
  14.  2.  4.  0.  2.  0.  4.  2.  2.  0.  2.  2.]
 [ 4.  0.  0.  4.  0.  2.  0.  0.  6.  2.  2.  2.  2.  4.  2.  2.  2.  2.
   4.  0.  0.  4.  2.  0.  4.  2.  0.  2.  0.  2.]
 [ 0.  2.  4.  0.  2.  4.  4.  0. 10.  2.  2.  4.  6.  0.  4.  0.  4.  0.
  14.  0.  2.  0.  0.  4.  2.  0.  0.  4.  0.  0.]
 [ 2.  0.  0.  2.  0.  0.  2.  0.  4.  4.  4.  2. 16.  2. 12.  0.  4.  2.
   0.  0.  2.  2.  0.  2.  4.  2.  0.  4.  0.  2.]
 [ 0.  2.  2.  4.  0.  0.  2.  2.  2.  0.  0.  4.  4.  4.  2.  4.  2. 14.
   2.  4.  4.  0.  2.  2.  2.  2.  0.  0.  2.  0.]
 [ 0.  0.  0.  4.  2.  2.  0.  2.  4.  2.  4.  2.  2.  2.  2.  6.  0.  2.
   0.  2.  2.  0.  0.  2.  4. 12.  2.  2.  4.  0.]
 [ 6.  4.  0.  0.  0.  2.  2.  0.  8.  4.  2.  0.  2.  0.  8.  2.  4.  0.
   4.  0.  2.  0.  2.  0.  0.  2. 

In [None]:
# 4.2.5 Massey's method to verify HodgeRank
# X is a m × n matrix with X_ki = 1 and X_kj = −1 if team i beats team j in the kth game
X = np.zeros((NUM_GAMES, num_teams), dtype=np.int8)  # Use int8 to save memory, modify as needed
for r, row in df.iterrows():  # Set the values in the X matrix
    team_id = row['team_id']
    opponent_id = row['opponent_id']
    
    if row['team_won']:
        X[r][team_id] = 1
        X[r][opponent_id] = -1
    else:
        X[r][team_id] = -1
        X[r][opponent_id] = 1

# vector y stores the margin of victory for each of the m games
y = np.array(df['team_score'] - df['opponent_score'], dtype=np.int32)  # Use int32 to save memory, modify as needed

print(f"X matrix:\n{X}")
print(f"y vector:\n{y}")

# M & p matrices
M = np.matmul(X.T, X)  # num_teams x num_teams matrix
p = np.matmul(X.T, y)  # num_teams x 1 vector

# Make M non-singular and r = M^(-1)p sum to zero
M[-1] = 1  # Set the last row to 1 to make it non-singular
p[-1] = 0  # Set the last element to 0 to make the sum zero

# Both are final
print(f"M matrix:\n{M}")
print(f"p vector:\n{p}")