In [1]:
import itertools
import random
import pandas as pd
import numpy as np
import json

In [2]:
matches = pd.read_excel("data/matches.xlsx", dtype={"home_team_id": str, "away_team_id": str}, index_col=0)
matches

Unnamed: 0_level_0,round,date,home_team_id,home_goals,home_shootout_goals,away_team_id,away_goals,away_shootout_goals
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1,2023-01-01,08,2.0,,12,4.0,
2,1,2023-01-01,04,2.0,,03,1.0,
3,1,2023-01-01,02,5.0,,06,0.0,
4,1,2023-01-01,05,5.0,,11,3.0,
5,1,2023-01-01,01,1.0,,10,5.0,
...,...,...,...,...,...,...,...,...
62,11,2023-03-19,11,,,09,,
63,11,2023-03-19,07,,,02,,
64,11,2023-03-19,06,,,01,,
65,11,2023-03-19,08,,,04,,


In [3]:
played_games = matches.dropna(subset=['home_goals', 'away_goals'])
played_games

Unnamed: 0_level_0,round,date,home_team_id,home_goals,home_shootout_goals,away_team_id,away_goals,away_shootout_goals
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
1,1,2023-01-01,8,2.0,,12,4.0,
2,1,2023-01-01,4,2.0,,3,1.0,
3,1,2023-01-01,2,5.0,,6,0.0,
4,1,2023-01-01,5,5.0,,11,3.0,
5,1,2023-01-01,1,1.0,,10,5.0,
6,1,2023-01-01,9,4.0,,7,2.0,
7,2,2023-01-08,3,3.0,,8,5.0,
8,2,2023-01-08,10,3.0,,2,2.0,
9,2,2023-01-08,12,3.0,,1,2.0,
10,2,2023-01-08,7,5.0,,5,3.0,


In [4]:
unplayed_games = matches[matches['home_goals'].isna() & matches['away_goals'].isna()]
unplayed_games

Unnamed: 0_level_0,round,date,home_team_id,home_goals,home_shootout_goals,away_team_id,away_goals,away_shootout_goals
match_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
55,10,2023-03-12,1,,,7,,
56,10,2023-03-12,2,,,11,,
57,10,2023-03-12,4,,,12,,
58,10,2023-03-12,9,,,3,,
59,10,2023-03-12,10,,,6,,
60,10,2023-03-12,5,,,8,,
61,11,2023-03-19,12,,,10,,
62,11,2023-03-19,11,,,9,,
63,11,2023-03-19,7,,,2,,
64,11,2023-03-19,6,,,1,,


In [5]:
# Initialize the classification dictionary
classification = {}

# Loop over each row in the DataFrame
for i, row in played_games.iterrows():
    home_team_id = row['home_team_id']
    away_team_id = row['away_team_id']
    home_goals = row['home_goals']
    away_goals = row['away_goals']
    home_shootout_goals = np.nan_to_num(row['home_shootout_goals'])
    away_shootout_goals = np.nan_to_num(row['away_shootout_goals'])
    
    # Initialize each team
    if home_team_id not in classification:
        classification[home_team_id] = {'W': 0, 'L': 0, 'GD': 0}
    if away_team_id not in classification:
        classification[away_team_id] = {'W': 0, 'L': 0, 'GD': 0}
        
    # Update the wins and loses for both teams
    if home_goals > away_goals or home_shootout_goals > away_shootout_goals:
        classification[home_team_id]['W'] += 1
        classification[away_team_id]['L'] += 1
    else:
        classification[home_team_id]['L'] += 1
        classification[away_team_id]['W'] += 1
    
    # Update the goal difference
    classification[home_team_id]['GD'] += home_goals - away_goals
    classification[away_team_id]['GD'] += away_goals - home_goals


    
# Sort the classification dictionary by the most wins and then by goal difference in case of a tie
classification = dict(sorted(classification.items(), key=lambda item: (-item[1]['W'], -item[1]['GD'])))

# Print the classification dictionary
classification

{'09': {'W': 7, 'L': 2, 'GD': 8.0},
 '10': {'W': 7, 'L': 2, 'GD': 7.0},
 '07': {'W': 6, 'L': 3, 'GD': 9.0},
 '04': {'W': 6, 'L': 3, 'GD': -1.0},
 '02': {'W': 5, 'L': 4, 'GD': 11.0},
 '05': {'W': 5, 'L': 4, 'GD': -1.0},
 '12': {'W': 4, 'L': 5, 'GD': -2.0},
 '11': {'W': 4, 'L': 5, 'GD': -2.0},
 '01': {'W': 4, 'L': 5, 'GD': -4.0},
 '06': {'W': 3, 'L': 6, 'GD': 0.0},
 '08': {'W': 3, 'L': 6, 'GD': -7.0},
 '03': {'W': 0, 'L': 9, 'GD': -18.0}}

In [6]:
unplayed_games_list = unplayed_games[['home_team_id', 'away_team_id']].apply(tuple, axis=1).to_list()

# Define a function to simulate all possible results
def simulate_results():
    for results in itertools.product(['W', 'L'], repeat=len(unplayed_games_list)):
        # Initialize a dictionary to keep track of the results
        result = {team: {'W': classification[team]['W'], 'L': classification[team]['L'], 'GD': classification[team]['GD']} for team in classification}
        # Update the dictionary based on the simulated results
        for i, (team1, team2) in enumerate(unplayed_games_list):
            if results[i] == 'W':
                result[team1]['W'] += 1
                result[team2]['L'] += 1
            else:
                result[team1]['L'] += 1
                result[team2]['W'] += 1
        # Calculate the positions of the teams based on their updated records
        positions = sorted(classification.keys(), key=lambda team: (-result[team]['W'], -result[team]['GD']))
        # Yield the positions as a tuple
        yield tuple(positions)

In [7]:
# Initialize a dictionary to keep track of the counts of each position for each team
counts = {team: {position: 0 for position in range(1, len(classification)+1)} for team in classification.keys()}

# Simulate all possible results and count the positions of each team
for positions in simulate_results():
    for i, team in enumerate(positions):
        counts[team][i+1] += 1

In [8]:
# Print the counts for each team and position
for team in counts:
    print(f'{team}:')
    for position, count in counts[team].items():
        print(f'  Position {position}: {count} times, {round(count/2**len(unplayed_games)*100, 2)}%')

09:
  Position 1: 2200 times, 53.71%
  Position 2: 992 times, 24.22%
  Position 3: 656 times, 16.02%
  Position 4: 224 times, 5.47%
  Position 5: 24 times, 0.59%
  Position 6: 0 times, 0.0%
  Position 7: 0 times, 0.0%
  Position 8: 0 times, 0.0%
  Position 9: 0 times, 0.0%
  Position 10: 0 times, 0.0%
  Position 11: 0 times, 0.0%
  Position 12: 0 times, 0.0%
10:
  Position 1: 1152 times, 28.12%
  Position 2: 1632 times, 39.84%
  Position 3: 992 times, 24.22%
  Position 4: 288 times, 7.03%
  Position 5: 32 times, 0.78%
  Position 6: 0 times, 0.0%
  Position 7: 0 times, 0.0%
  Position 8: 0 times, 0.0%
  Position 9: 0 times, 0.0%
  Position 10: 0 times, 0.0%
  Position 11: 0 times, 0.0%
  Position 12: 0 times, 0.0%
07:
  Position 1: 648 times, 15.82%
  Position 2: 864 times, 21.09%
  Position 3: 1008 times, 24.61%
  Position 4: 672 times, 16.41%
  Position 5: 712 times, 17.38%
  Position 6: 192 times, 4.69%
  Position 7: 0 times, 0.0%
  Position 8: 0 times, 0.0%
  Position 9: 0 times, 0.

In [9]:
save_file = open("results/positions_J9.json", "w")  
json.dump(counts, save_file)  
save_file.close()  