In [7]:
import pandas as pd
import numpy as np

In [3]:
# TODO: pending getting 2023-24 participants. Right now this csv has 2022-23 participants.

# Read in the participants
df_participants = pd.read_csv('Participants.csv', index_col=0)
print(df_participants.shape)
df_participants

(8, 1)


Unnamed: 0,PlayerName
0,Tyrese Haliburton
1,Tyler Herro
2,Buddy Hield
3,Kevin Huerter
4,Damian Lillard
5,Lauri Markkanen
6,Julius Randle
7,Jayson Tatum


In [5]:
# Read in the csv containing the contest data and modelled player 3 point make data from the previous stage

df_modelled = pd.read_csv('ModelledData.csv', index_col=0)
df_modelled.head()

Unnamed: 0,Player,GlobalMake,made,att,dewmade,dewatt,ContestMakeActual,dewContestMakeActual,ProjectedMake,DewProjectedMake
0,Bradley Beal,0.356924,15,25,0.0,0.0,0.6,0.0,0.562628,0.36504
1,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417
2,C.J. McCollum,0.399027,24,50,0.0,0.0,0.48,0.0,0.645726,0.367512
3,Damian Lillard,0.364806,13,25,0.0,0.0,0.52,0.0,0.578611,0.365502
4,Danny Green,0.342776,16,25,0.0,0.0,0.64,0.0,0.533635,0.364211


In [8]:
# Set how many iterations we want to run
sim_run_count = 10000

# Only take the rows that include the participants and duplicate the data 
df_contest = df_modelled[df_modelled['Player'].isin(df_participants['PlayerName'].values)].reset_index(drop=True)
df_contest = pd.DataFrame(np.repeat(df_contest.values, sim_run_count, axis=0), columns=df_contest.columns)
df_contest['RunCount'] = pd.concat([pd.DataFrame([x for x in range(1, sim_run_count + 1)])]*sim_run_count, ignore_index=True)

# Qualifying Round
df_contest['RegularBalls'] = np.random.binomial(16, df_contest['ProjectedMake'], len(df_contest))
df_contest['MoneyBallsRack'] = np.random.binomial(5, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBallsNonRack'] = np.random.binomial(4, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBalls'] = df_contest['MoneyBallsRack'] + df_contest['MoneyBallsNonRack']
df_contest['DewBalls'] = np.random.binomial(2, df_contest['DewProjectedMake'], len(df_contest)) * 3
df_contest['QualifyingTotal'] = df_contest['RegularBalls'] + df_contest['MoneyBalls'] + df_contest['DewBalls']

# Hypothetical Final
df_contest['RegularBallsF'] = np.random.binomial(16, df_contest['ProjectedMake'], len(df_contest))
df_contest['MoneyBallsRackF'] = np.random.binomial(5, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBallsNonRackF'] = np.random.binomial(4, df_contest['ProjectedMake'], len(df_contest))*2
df_contest['MoneyBallsF'] = df_contest['MoneyBallsRackF'] + df_contest['MoneyBallsNonRackF']
df_contest['DewBallsF'] = np.random.binomial(2, df_contest['DewProjectedMake'], len(df_contest)) * 3
df_contest['FinalTotal'] = df_contest['RegularBallsF'] + df_contest['MoneyBallsF'] + df_contest['DewBallsF']

df_contest

Unnamed: 0,Player,GlobalMake,made,att,dewmade,dewatt,ContestMakeActual,dewContestMakeActual,ProjectedMake,DewProjectedMake,...,MoneyBallsNonRack,MoneyBalls,DewBalls,QualifyingTotal,RegularBallsF,MoneyBallsRackF,MoneyBallsNonRackF,MoneyBallsF,DewBallsF,FinalTotal
0,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,2,8,6,26,9,6,6,12,0,21
1,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,4,8,3,22,11,4,6,10,3,24
2,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,8,16,0,27,9,2,4,6,0,15
3,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,8,12,0,24,8,6,8,14,0,22
4,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,4,6,0,16,13,6,8,14,6,33
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
79995,Tyrese Haliburton,0.402166,0,0,0.0,0.0,0.0,0.0,0.651645,0.367696,...,2,8,0,19,11,8,8,16,3,30
79996,Tyrese Haliburton,0.402166,0,0,0.0,0.0,0.0,0.0,0.651645,0.367696,...,6,16,3,28,10,4,6,10,6,26
79997,Tyrese Haliburton,0.402166,0,0,0.0,0.0,0.0,0.0,0.651645,0.367696,...,4,8,0,14,8,8,4,12,6,26
79998,Tyrese Haliburton,0.402166,0,0,0.0,0.0,0.0,0.0,0.651645,0.367696,...,4,8,3,25,10,8,6,14,3,27


In [9]:
# Group by run count and rank the players by their qualifying total. 
# We deal with ties by picking the first row that appears, but because we've randomly shuffled the data using the sample function
# this ensures we're not just picking the players that are alphabetically earlier everytime.
# in the real world the players would do multiple 30 seconds rounds to determine a winner, but in this case we are
# just going to randomly pick 1 of them to go through

df_contest['QualifyingRank'] = df_contest.sample(frac=1).groupby(['RunCount'])['QualifyingTotal'].rank(method='first', ascending=False).reindex_like(df_contest)
df_contest.groupby(['RunCount']).sample(frac=1)[['Player', 'RunCount', 'QualifyingTotal', 'QualifyingRank']][:16]

Unnamed: 0,Player,RunCount,QualifyingTotal,QualifyingRank
20000,Jayson Tatum,1,27,2.0
50000,Lauri Markkanen,1,20,8.0
30000,Julius Randle,1,20,7.0
10000,Damian Lillard,1,21,6.0
60000,Tyler Herro,1,30,1.0
0,Buddy Hield,1,26,4.0
40000,Kevin Huerter,1,21,5.0
70000,Tyrese Haliburton,1,27,3.0
40001,Kevin Huerter,2,23,1.0
20001,Jayson Tatum,2,18,5.0


In [10]:
# if a player is ranked 3 or higher, then they are through qualifying
df_contest['ThroughQualifying'] = df_contest[['QualifyingRank']] <= 3

through_qualifying = df_contest['ThroughQualifying'] == True
not_through_qualifying = df_contest['ThroughQualifying'] == False
df_contest.loc[through_qualifying, 'FinalThroughQualifyingTotal'] = df_contest.loc[through_qualifying, 'FinalTotal']
df_contest.loc[not_through_qualifying, 'FinalThroughQualifyingTotal'] = 0

df_contest.head()

Unnamed: 0,Player,GlobalMake,made,att,dewmade,dewatt,ContestMakeActual,dewContestMakeActual,ProjectedMake,DewProjectedMake,...,QualifyingTotal,RegularBallsF,MoneyBallsRackF,MoneyBallsNonRackF,MoneyBallsF,DewBallsF,FinalTotal,QualifyingRank,ThroughQualifying,FinalThroughQualifyingTotal
0,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,26,9,6,6,12,0,21,4.0,False,0.0
1,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,22,11,4,6,10,3,24,2.0,True,24.0
2,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,27,9,2,4,6,0,15,1.0,True,15.0
3,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,24,8,6,8,14,0,22,3.0,True,22.0
4,Buddy Hield,0.39741,56,75,0.0,4.0,0.746667,0.0,0.642658,0.367417,...,16,13,6,8,14,6,33,7.0,False,0.0


In [11]:
# Rank the winner from the final round
df_contest['FinalRank'] =  df_contest.sample(frac=1).groupby(['RunCount'])['FinalThroughQualifyingTotal'].rank(method='first', ascending=False).reindex_like(df_contest)

df_win = df_contest[df_contest['FinalRank'] == 1]
df_win.groupby(['Player'])['FinalRank'].sum() / sim_run_count

Player
Buddy Hield          0.2062
Damian Lillard       0.0776
Jayson Tatum         0.0776
Julius Randle        0.0103
Kevin Huerter        0.1071
Lauri Markkanen      0.1376
Tyler Herro          0.1419
Tyrese Haliburton    0.2417
Name: FinalRank, dtype: float64