In [1]:
import pandas as pd
import numpy as np
import choix # https://github.com/lucasmaystre/choix
# get data through terminal: wget https://raw.githubusercontent.com/kpelechrinis/BT-model/master/nba1617.csv

In [95]:
# An example of Bradley-Terry model with win-loss relationships in the NBA
# Adopted from: https://github.com/kpelechrinis/BT-model
# See also https://squared2020.com/2017/11/09/bradley-terry-rankings-introduction-to-logistic-regression/
# and https://www.anishathalye.com/2015/03/07/designing-a-better-judging-system/
# and https://en.wikipedia.org/wiki/Pairwise_comparison#Probabilistic_models

df = pd.read_csv("nba1617.csv") # : a file with the game results from the 2016-17 NBA season
teams = list(set(df.Home.unique()))
t = list(np.sort(teams))

data = []

for i in range(len(df.Visitor)):
    if df.PTSV[i] > df.PTSH[i]: # if visitor win
        el = (t.index(df.Visitor[i]),t.index(df.Home[i])) # set vistor first in tuple (in choix, the "winning item always comes first in the tuple")
    else:
        el = (t.index(df.Home[i]),t.index(df.Visitor[i])) # else, set home first in tuple
    data.append(el)

skills = choix.ilsr_pairwise(30, data) # pairwise comparison
# skills = choix.mm_pairwise(30, data) # pairwise comparison, MAP - same results
# ------------------------------

prob_mat = np.zeros([30]) # create first empty array

for i in range(30):
    win_probs = []
    for j in range(30):
        win_probs.append(choix.probabilities([i, j], skills)[0])
        
    if prob_mat.sum() == 0:
        prob_mat = prob_mat + win_probs #first iteration, replace zero-array
        
    else:
        prob_mat = np.vstack((prob_mat,np.array(win_probs))) #concurrent iteration, append array as row-vector, creating the propability matrix 

prob_df = pd.DataFrame(prob_mat, columns=t, index=t)
# Out puts a probability matrix (and Dataframe) that captures the win probability of the row team winning the column team

# OLD:
# for i in range(30):
#     win_probs = []
#     for j in range(30):
#         win_probs.append(choix.probabilities([i, j], skills)[0])
#     print(win_probs) # a probability matrix that captures the win probability of the row team winning the column team

In [96]:
prob_df.head()

Unnamed: 0,Atlanta Hawks,Boston Celtics,Brooklyn Nets,Charlotte Hornets,Chicago Bulls,Cleveland Cavaliers,Dallas Mavericks,Denver Nuggets,Detroit Pistons,Golden State Warriors,...,Oklahoma City Thunder,Orlando Magic,Philadelphia 76ers,Phoenix Suns,Portland Trail Blazers,Sacramento Kings,San Antonio Spurs,Toronto Raptors,Utah Jazz,Washington Wizards
Atlanta Hawks,0.5,0.36973,0.773103,0.580707,0.521359,0.391213,0.606777,0.521968,0.567886,0.183792,...,0.426307,0.668825,0.682184,0.718258,0.507073,0.616447,0.255857,0.397675,0.380482,0.422176
Boston Celtics,0.63027,0.5,0.853121,0.702462,0.64996,0.522774,0.724553,0.650515,0.691386,0.277381,...,0.558837,0.774911,0.785364,0.812937,0.636838,0.732603,0.369529,0.529519,0.511466,0.554663
Brooklyn Nets,0.226897,0.146879,0.5,0.289001,0.242242,0.158673,0.311711,0.24269,0.278345,0.06199,...,0.179042,0.372142,0.386491,0.427984,0.231898,0.320512,0.09166,0.162319,0.152721,0.17657
Charlotte Hornets,0.419293,0.297538,0.710999,0.5,0.44024,0.316935,0.527001,0.440842,0.486892,0.139849,...,0.349188,0.593198,0.607818,0.647977,0.426197,0.537136,0.198883,0.322821,0.307213,0.345355
Chicago Bulls,0.478641,0.35004,0.757758,0.55976,0.5,0.371052,0.586204,0.50061,0.546798,0.171312,...,0.405543,0.649624,0.663368,0.70064,0.485705,0.596043,0.239923,0.377388,0.360547,0.401472


In [97]:
skills_norm = (skills - skills.min())/(skills.max()-skills.min())# fit between o and 1
ranking = pd.DataFrame(list(zip(t,list(skills_norm))), columns = ['team','score']).sort_values('score', ascending=False).reset_index(drop = True)
ranking
# seem reasnoble according to https://www.espn.com/nba/standings/_/season/2017
# not sure why I want to nor it.. Seems prudent though

Unnamed: 0,team,score
0,Golden State Warriors,1.0
1,San Antonio Spurs,0.844209
2,Houston Rockets,0.715555
3,Boston Celtics,0.647564
4,Los Angeles Clippers,0.63629
5,Utah Jazz,0.63068
6,Cleveland Cavaliers,0.61401
7,Toronto Raptors,0.604052
8,Washington Wizards,0.56676
9,Oklahoma City Thunder,0.560534


Next up UFC light weight ;)

# A baysian approximation.. This might be more akin to an actual Bradley-terry model

In [103]:
# bayesian:

# An example of Bradley-Terry model with win-loss relationships in the NBA

df = pd.read_csv("nba1617.csv") # : a file with the game results from the 2016-17 NBA season
teams = list(set(df.Home.unique()))
t = list(np.sort(teams))

data = []

for i in range(len(df.Visitor)):
    if df.PTSV[i] > df.PTSH[i]: # if visitor win
        el = (t.index(df.Visitor[i]),t.index(df.Home[i])) # set vistor first in tuple 
    else:
        el = (t.index(df.Home[i]),t.index(df.Visitor[i])) # else, set home first in tuple
    data.append(el)

skills = choix.ep_pairwise(30, data, alpha = 0.1, model = 'logit') # pairwise comparison

# ------------------------------

prob_mat = np.zeros([30]) # create first empty array

for i in range(30):
    win_probs = []
    for j in range(30):
        win_probs.append(choix.probabilities([i, j], skills[0])[0])
        
    if prob_mat.sum() == 0:
        prob_mat = prob_mat + win_probs #first iteration, replace zero-array
        
    else:
        prob_mat = np.vstack((prob_mat,np.array(win_probs))) #concurrent iteration, append array as row-vector, creating the propability matrix 

prob_df = pd.DataFrame(prob_mat, columns=t, index=t)
# Out puts a probability matrix (and Dataframe) that captures the win probability of the row team winning the column team

# OLD:
# for i in range(30):
#     win_probs = []
#     for j in range(30):
#         win_probs.append(choix.probabilities([i, j], skills)[0])
#     print(win_probs) # a probability matrix that captures the win probability of the row team winning the column team

In [114]:
# skills_norm = (skills[0] - skills[0].min())/(skills[0].max()-skills[0].min())# fit between o and 1
# ranking = pd.DataFrame(list(zip(t,list(skills_norm))), columns = ['team','score']).sort_values('score', ascending=False).reset_index(drop = True)

ranking = pd.DataFrame(list(zip(t,list(skills[0]),list(skills[1].std(axis = 1)))), columns = ['team','score','sd']).sort_values('score', ascending=False).reset_index(drop = True)
ranking

# seem reasnoble according to https://www.espn.com/nba/standings/_/season/2017
# if you normalize the score it comes very close to choix.ilsr_pairwise
# The standart diviations - are they correct?

Unnamed: 0,team,score,sd
0,Golden State Warriors,1.566819,0.01503
1,San Antonio Spurs,1.135458,0.012125
2,Houston Rockets,0.778744,0.010671
3,Boston Celtics,0.591608,0.010165
4,Los Angeles Clippers,0.559685,0.010094
5,Utah Jazz,0.544399,0.010007
6,Cleveland Cavaliers,0.498526,0.009864
7,Toronto Raptors,0.471108,0.009926
8,Washington Wizards,0.36802,0.009709
9,Oklahoma City Thunder,0.350153,0.009693


In [125]:
choix.ilsr_pairwise?

[0;31mSignature:[0m
[0mchoix[0m[0;34m.[0m[0milsr_pairwise[0m[0;34m([0m[0;34m[0m
[0;34m[0m    [0mn_items[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mdata[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0malpha[0m[0;34m=[0m[0;36m0.0[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0minitial_params[0m[0;34m=[0m[0;32mNone[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mmax_iter[0m[0;34m=[0m[0;36m100[0m[0;34m,[0m[0;34m[0m
[0;34m[0m    [0mtol[0m[0;34m=[0m[0;36m1e-08[0m[0;34m,[0m[0;34m[0m
[0;34m[0m[0;34m)[0m[0;34m[0m[0;34m[0m[0m
[0;31mDocstring:[0m
Compute the ML estimate of model parameters using I-LSR.

This function computes the maximum-likelihood (ML) estimate of model
parameters given pairwise-comparison data (see :ref:`data-pairwise`), using
the iterative Luce Spectral Ranking algorithm [MG15]_.

The transition rates of the LSR Markov chain are initialized with
``alpha``. When ``alpha > 0``, this corresponds to a form of regularization
(see 