In [1]:
import pandas as pd
import numpy as np

In [2]:
# TODO: pending getting 2023-24 participants. Right now this csv has 2022-23 participants.

# Read in the participants
df_participants = pd.read_csv('Participants.csv', index_col=0)
print(df_participants.shape)
df_participants

(8, 1)


Unnamed: 0,PlayerName
0,Tyrese Maxey
1,Tyrese Haliburton
2,Malik Beasley
3,Jalen Brunson
4,Lauri Markkanen
5,Damian Lillard
6,Donovan Mitchell
7,


In [3]:
# Read in the csv containing the contest data and modelled player 3 point make data from the previous stage
df_modelled = pd.read_csv('ModelledData.csv', index_col=0)
df_modelled.head()

Unnamed: 0,Player,GlobalMake,made,att,dewmade,dewatt,ContestMakeActual,dewContestMakeActual,ProjectedMake,DewProjectedMake
0,Bradley Beal,0.347381,15,25,0.0,0.0,0.6,0.0,0.563809,0.31367
1,Buddy Hield,0.393311,91,125,0.0,8.0,0.728,0.0,0.623608,0.348474
2,C.J. McCollum,0.408477,24,50,0.0,0.0,0.48,0.0,0.642646,0.360355
3,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333
4,Danny Green,0.241922,16,25,0.0,0.0,0.64,0.0,0.422291,0.241567


In [4]:
# Set how many iterations we want to run
sim_run_count = 20000

# Only take the rows that include the participants and duplicate the data 
df_contest = df_modelled[df_modelled['Player'].isin(df_participants['PlayerName'].values)].reset_index(drop=True)
df_contest = pd.DataFrame(np.repeat(df_contest.values, sim_run_count, axis=0), columns=df_contest.columns)
df_contest['RunCount'] = pd.concat([pd.DataFrame([x for x in range(1, sim_run_count + 1)])]*sim_run_count, ignore_index=True)

# Qualifying Round
df_contest['RegularBalls'] = np.random.binomial(16, df_contest['ProjectedMake'], len(df_contest))
df_contest['MoneyBallsRack'] = np.random.binomial(5, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBallsNonRack'] = np.random.binomial(4, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBalls'] = df_contest['MoneyBallsRack'] + df_contest['MoneyBallsNonRack']
df_contest['DewBalls'] = np.random.binomial(2, df_contest['DewProjectedMake'], len(df_contest)) * 3
df_contest['QualifyingTotal'] = df_contest['RegularBalls'] + df_contest['MoneyBalls'] + df_contest['DewBalls']

# Hypothetical Final
df_contest['RegularBallsF'] = np.random.binomial(16, df_contest['ProjectedMake'], len(df_contest))
df_contest['MoneyBallsRackF'] = np.random.binomial(5, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBallsNonRackF'] = np.random.binomial(4, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBallsF'] = df_contest['MoneyBallsRackF'] + df_contest['MoneyBallsNonRackF']
df_contest['DewBallsF'] = np.random.binomial(2, df_contest['DewProjectedMake'], len(df_contest)) * 3
df_contest['FinalTotal'] = df_contest['RegularBallsF'] + df_contest['MoneyBallsF'] + df_contest['DewBallsF']

df_contest

Unnamed: 0,Player,GlobalMake,made,att,dewmade,dewatt,ContestMakeActual,dewContestMakeActual,ProjectedMake,DewProjectedMake,...,MoneyBallsNonRack,MoneyBalls,DewBalls,QualifyingTotal,RegularBallsF,MoneyBallsRackF,MoneyBallsNonRackF,MoneyBallsF,DewBallsF,FinalTotal
0,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,2,6,0,14,12,6,6,12,0,24
1,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,2,8,0,17,7,6,6,12,0,19
2,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,6,10,0,18,13,6,6,12,0,25
3,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,4,12,0,26,10,6,2,8,6,24
4,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,6,12,0,20,11,4,4,8,0,19
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
139995,Tyrese Maxey,0.391078,0,0,0.0,0.0,0.0,0.0,0.620771,0.34674,...,8,10,3,24,8,6,4,10,3,21
139996,Tyrese Maxey,0.391078,0,0,0.0,0.0,0.0,0.0,0.620771,0.34674,...,4,12,0,22,8,6,2,8,3,19
139997,Tyrese Maxey,0.391078,0,0,0.0,0.0,0.0,0.0,0.620771,0.34674,...,8,16,3,30,7,8,2,10,0,17
139998,Tyrese Maxey,0.391078,0,0,0.0,0.0,0.0,0.0,0.620771,0.34674,...,8,18,3,35,8,6,2,8,6,22


In [5]:
# Group by run count and rank the players by their qualifying total. 
# We deal with ties by picking the first row that appears, but because we've randomly shuffled the data using the sample function
# this ensures we're not just picking the players that are alphabetically earlier everytime.
# in the real world the players would do multiple 30 seconds rounds to determine a winner, but in this case we are
# just going to randomly pick 1 of them to go through

df_contest['QualifyingRank'] = df_contest.sample(frac=1).groupby(['RunCount'])['QualifyingTotal'].rank(method='first', ascending=False).reindex_like(df_contest)
df_contest.groupby(['RunCount']).sample(frac=1)[['Player', 'RunCount', 'QualifyingTotal', 'QualifyingRank']][:16]

Unnamed: 0,Player,RunCount,QualifyingTotal,QualifyingRank
120000,Tyrese Maxey,1,28,2.0
0,Damian Lillard,1,14,7.0
60000,Lauri Markkanen,1,21,5.0
100000,Tyrese Haliburton,1,22,4.0
40000,Jalen Brunson,1,23,3.0
80000,Malik Beasley,1,29,1.0
20000,Donovan Mitchell,1,14,6.0
1,Damian Lillard,2,17,7.0
40001,Jalen Brunson,2,31,1.0
60001,Lauri Markkanen,2,26,4.0


In [6]:
# if a player is ranked 3 or higher, then they are through qualifying
df_contest['ThroughQualifying'] = df_contest[['QualifyingRank']] <= 3

through_qualifying = df_contest['ThroughQualifying'] == True
not_through_qualifying = df_contest['ThroughQualifying'] == False
df_contest.loc[through_qualifying, 'FinalThroughQualifyingTotal'] = df_contest.loc[through_qualifying, 'FinalTotal']
df_contest.loc[not_through_qualifying, 'FinalThroughQualifyingTotal'] = 0

df_contest.head()

Unnamed: 0,Player,GlobalMake,made,att,dewmade,dewatt,ContestMakeActual,dewContestMakeActual,ProjectedMake,DewProjectedMake,...,QualifyingTotal,RegularBallsF,MoneyBallsRackF,MoneyBallsNonRackF,MoneyBallsF,DewBallsF,FinalTotal,QualifyingRank,ThroughQualifying,FinalThroughQualifyingTotal
0,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,14,12,6,6,12,0,24,7.0,False,0.0
1,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,17,7,6,6,12,0,19,7.0,False,0.0
2,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,18,13,6,6,12,0,25,5.0,False,0.0
3,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,26,10,6,2,8,6,24,1.0,True,24.0
4,Damian Lillard,0.353682,45,75,3.0,4.0,0.6,0.75,0.572167,0.318333,...,20,11,4,4,8,0,19,6.0,False,0.0


In [7]:
# Rank the winner from the final round
df_contest['FinalRank'] =  df_contest.sample(frac=1).groupby(['RunCount'])['FinalThroughQualifyingTotal'].rank(method='first', ascending=False).reindex_like(df_contest)

df_win = df_contest[df_contest['FinalRank'] == 1]
df_win.groupby(['Player'])['FinalRank'].sum() / sim_run_count

Player
Damian Lillard       0.05770
Donovan Mitchell     0.07010
Jalen Brunson        0.19425
Lauri Markkanen      0.14320
Malik Beasley        0.19850
Tyrese Haliburton    0.19370
Tyrese Maxey         0.14255
Name: FinalRank, dtype: float64

In [8]:
# decimal odds
1 / (df_win.groupby(['Player'])['FinalRank'].sum() / sim_run_count)

Player
Damian Lillard       17.331023
Donovan Mitchell     14.265335
Jalen Brunson         5.148005
Lauri Markkanen       6.983240
Malik Beasley         5.037783
Tyrese Haliburton     5.162623
Tyrese Maxey          7.015082
Name: FinalRank, dtype: float64