In [214]:
import pandas as pd
import numpy as np

from tqdm import tqdm

In [215]:
df = pd.read_parquet('../../data/curated/adjusted_probabilities_2022.parquet')
season_df = pd.read_parquet('../../data/curated/clean_stats_13-22.parquet').query('season == 2022')

In [216]:
df.head()

Unnamed: 0,player_id,match_id,player_first_name,player_last_name,player_team,new_0,new_1,new_2,new_3,exp_votes
0,11904,16117,Tom,Liberatore,Western Bulldogs,0.998848,0.000859,0.000269,2.5e-05,0.001471
1,11945,16117,Steven,May,Melbourne,0.995647,0.003407,0.000831,0.000115,0.005415
2,11972,16117,Max,Gawn,Melbourne,0.946869,0.027525,0.021477,0.004129,0.082865
3,12015,16117,Tom,McDonald,Melbourne,0.998376,0.001224,0.000359,4.2e-05,0.002066
4,12034,16117,Adam,Tomlinson,Melbourne,0.999494,0.000411,8.7e-05,8e-06,0.000608


In [217]:
players = list(df.player_id.unique())

In [218]:
match_ids = list(df.match_id.unique())

In [219]:
num_simulations = 100

votes_3 = []
votes_2 = []
votes_1 = []

# simulate 10,000 seasons 
for _ in tqdm(range(num_simulations), desc="Simulations"):
    
    # simulate every game in the season, choose a 3-2-1 vote
    for match in match_ids:
        sub_df = df.query('match_id == @match')
        match_players = sub_df.player_id.tolist()

        match_prob_3 = sub_df.new_3.tolist()
        match_prob_2 = sub_df.new_2.tolist()
        match_prob_1 = sub_df.new_1.tolist()

        vote_3 = np.random.choice(match_players, 1, p=match_prob_3)[0]
        vote_2 = vote_3  # Initialize with a value that requires re-selection
        
        # ensure same player is not chosen as vote 3 and 2
        while vote_2 == vote_3:
            vote_2 = np.random.choice(match_players, 1, p=match_prob_2)[0]

        vote_1 = vote_3  
        while vote_1 == vote_3 or vote_1 == vote_2:
            vote_1 = np.random.choice(match_players, 1, p=match_prob_1)[0]

        votes_3.append(vote_3)
        votes_2.append(vote_2)
        votes_1.append(vote_1)


Simulations: 100%|██████████| 100/100 [00:14<00:00,  7.01it/s]


In [220]:
player_votes = {}

In [221]:
for player in tqdm(players):
    
    votes = 3*votes_3.count(player) + 2*votes_2.count(player) + 1*votes_2.count(player)
    
    player_votes[player] = votes / num_simulations

  0%|          | 0/677 [00:00<?, ?it/s]

100%|██████████| 677/677 [00:00<00:00, 1606.68it/s]


In [222]:
votes_df = pd.DataFrame(player_votes.items(), columns=['player_id', 'predicted_votes'])

In [223]:
votes_df.sort_values('predicted_votes', ascending=False)[:10]

Unnamed: 0,player_id,predicted_votes
21,12411,29.16
373,12329,26.37
232,12061,23.1
56,12269,21.6
25,12437,20.28
408,12685,19.14
337,12596,19.05
195,12418,19.05
471,12171,17.07
149,12249,15.99


In [224]:
player_info = df[['player_id', 'player_first_name', 'player_last_name', 'player_team']].drop_duplicates('player_id')

In [225]:
total_df = pd.merge(player_info, votes_df, on='player_id').sort_values('predicted_votes', ascending=False)

In [226]:
actual_votes = season_df.groupby(['player_first_name', 'player_last_name'])['brownlow_votes'].sum()

In [227]:
final_df = pd.merge(total_df, actual_votes, on=['player_first_name', 'player_last_name'])

In [228]:
def get_name(row):
    
    return f"{row['player_first_name'][0]}. {row['player_last_name']}"

final_df['player'] = final_df.apply(get_name, axis=1)
final_df['error'] = final_df['predicted_votes'] - final_df['brownlow_votes']

cols = [
    'player', 'player_team', 'predicted_votes', 'brownlow_votes', 'error'
]
final_df = final_df[cols]

In [229]:
final_df[:10]

Unnamed: 0,player,player_team,predicted_votes,brownlow_votes,error
0,C. Oliver,Melbourne,29.16,25.0,4.16
1,T. Miller,Gold Coast,26.37,27.0,-0.63
2,L. Neale,Brisbane Lions,23.1,28.0,-4.9
3,P. Cripps,Carlton,21.6,29.0,-7.4
4,C. Petracca,Melbourne,20.28,24.0,-3.72
5,S. Walsh,Carlton,19.14,14.0,5.14
6,A. Brayshaw,Fremantle,19.05,25.0,-5.95
7,C. Mills,Sydney,19.05,21.0,-1.95
8,R. Laird,Adelaide,17.07,10.0,7.07
9,Z. Merrett,Essendon,15.99,17.0,-1.01


In [230]:
html_table = final_df[:10].to_html(index=False)
print(html_table)

<table border="1" class="dataframe">
  <thead>
    <tr style="text-align: right;">
      <th>player</th>
      <th>player_team</th>
      <th>predicted_votes</th>
      <th>brownlow_votes</th>
      <th>error</th>
    </tr>
  </thead>
  <tbody>
    <tr>
      <td>C. Oliver</td>
      <td>Melbourne</td>
      <td>29.16</td>
      <td>25.0</td>
      <td>4.16</td>
    </tr>
    <tr>
      <td>T. Miller</td>
      <td>Gold Coast</td>
      <td>26.37</td>
      <td>27.0</td>
      <td>-0.63</td>
    </tr>
    <tr>
      <td>L. Neale</td>
      <td>Brisbane Lions</td>
      <td>23.10</td>
      <td>28.0</td>
      <td>-4.90</td>
    </tr>
    <tr>
      <td>P. Cripps</td>
      <td>Carlton</td>
      <td>21.60</td>
      <td>29.0</td>
      <td>-7.40</td>
    </tr>
    <tr>
      <td>C. Petracca</td>
      <td>Melbourne</td>
      <td>20.28</td>
      <td>24.0</td>
      <td>-3.72</td>
    </tr>
    <tr>
      <td>S. Walsh</td>
      <td>Carlton</td>
      <td>19.14</td>
      <td>14.0</td>