In [39]:
import pandas as pd 
import pickle
from scipy.stats import poisson
dictTable = pickle.load(open("dictTable", "rb"))
df_historical_data = pd.read_csv("clean_euro_historical_data.csv")
df_fixture = pd.read_csv("clean_euro_2024_fixtures.csv")

# Delete (H) from Germany
rename_mappings = {
    'Germany (H)': 'Germany'
}
for group, df in dictTable.items():
    dictTable[group]['Team'] = df['Team'].replace(rename_mappings)

df_home = df_historical_data[["HomeTeam", "HomeGoals", "AwayGoals"]]
df_away = df_historical_data[["AwayTeam", "HomeGoals", "AwayGoals"]]

df_home = df_home.rename(columns={"HomeTeam" : "Team", "HomeGoals" : "GoalsScored", "AwayGoals" : "GoalsConceded"})
df_away = df_away.rename(columns={"AwayTeam" : "Team", "HomeGoals" : "GoalsConceded", "AwayGoals" : "GoalsScored"})

df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby("Team").mean()

def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        lamb_home = df_team_strength.at[home, "GoalsScored"] * df_team_strength.at[away, "GoalsConceded"]
        lamb_away = df_team_strength.at[away, "GoalsScored"] * df_team_strength.at[home, "GoalsConceded"]
        prob_home = 0
        prob_away = 0
        prob_draw = 0
        for x in range(0, 11):
            for y in range(0, 11):
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else :
        return (0, 0)




# Divide Fixtures Into Stages

In [38]:
df_fixture_group_stage = df_fixture[:36].copy()
df_fixture_knockout_stage = df_fixture[36:44].copy()
df_fixture_quarter_finals = df_fixture[44:48].copy()
df_fixture_semi_finals = df_fixture[48:50].copy()
df_fixture_final = df_fixture[50:].copy()

# Group Stage Prediction

In [41]:

# Predict Group Stage
for group in dictTable:
    teams_in_group = dictTable[group]["Team"].values
    df_fixture_group_6 = df_fixture_group_stage[df_fixture_group_stage["Home"].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row["Home"], row["Away"]
        points_home, points_away = predict_points(home, away)
        dictTable[group].loc[dictTable[group]["Team"] == home, "Pts"] += int(points_home)
        dictTable[group].loc[dictTable[group]["Team"] == away, "Pts"] += int(points_away)

    dictTable[group] = dictTable[group].sort_values("Pts", ascending=False).reset_index()
    dictTable[group] = dictTable[group][["Team", "Pts"]]
    dictTable[group] = dictTable[group].round(0)

# Print results neatly
for group, standings in dictTable.items():
    print(f"{group} Standings:")
    print(standings.to_string(index=False))
    print("\n" + "-"*30 + "\n")

Group A Standings:
       Team  Pts
    Germany 11.0
    Hungary  6.0
Switzerland  6.0
   Scotland  5.0

------------------------------

Group B Standings:
   Team  Pts
  Spain  9.0
  Italy  9.0
Croatia  6.0
Albania  2.0

------------------------------

Group C Standings:
    Team  Pts
 England  6.0
 Denmark  3.0
Slovenia  3.0
  Serbia  0.0

------------------------------

Group D Standings:
       Team  Pts
Netherlands 11.0
     France  9.0
     Poland  4.0
    Austria  3.0

------------------------------

Group E Standings:
    Team  Pts
 Belgium 13.0
 Romania  6.0
 Ukraine  5.0
Slovakia  5.0

------------------------------

Group F Standings:
          Team  Pts
      Portugal  7.0
Czech Republic  5.0
        Turkey  1.0
       Georgia  0.0

------------------------------



## Ranking of Third-placed Teams that Advances through

In [23]:
# Identify the best third-placed teams
third_placed_teams = []
for group, df in dictTable.items():
    third_place_team = df.iloc[2]
    third_placed_teams.append((group, third_place_team['Team'], third_place_team['Pts']))

# Sort by points (and other criteria if necessary) to get the best four
third_placed_teams.sort(key=lambda x: x[2], reverse=True)
best_third_placed = [team[1] for team in third_placed_teams[:4]]

# Define possible placeholders for the third-placed teams in the knockout stage fixtures
knockout_mapping = {
    "3rd Group A/D/E/F": best_third_placed[0] if len(best_third_placed) > 0 else None,
    "3rd Group D/E/F": best_third_placed[1] if len(best_third_placed) > 1 else None,
    "3rd Group A/B/C/D": best_third_placed[2] if len(best_third_placed) > 2 else None,
    "3rd Group A/B/C": best_third_placed[3] if len(best_third_placed) > 3 else None
}

print(best_third_placed)

['Switzerland', 'Croatia', 'Poland', 'Ukraine']


# Helper Functions

In [19]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row["Home"], row["Away"]
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, "Winner"] = winner
    return df_fixture_updated



def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, "Winner"]
        match = df_fixture_round_1.loc[index, "Score"]
        df_fixture_round_2.replace({f"Winner {match}" : winner}, inplace=True)
    df_fixture_round_2["Winner"] = "?"
    return df_fixture_round_2

# Round of 16

In [21]:
# Update the knockout stage matchups with winners and runners-up
for group in dictTable:
    group_winner = dictTable[group].loc[0, "Team"]
    runners_up = dictTable[group].loc[1, "Team"]
    df_fixture_knockout_stage.replace({f"Winner {group}": group_winner,
                                       f"Runner-up {group}": runners_up}, inplace=True)

# Update the knockout stage matchups with best third-placed teams
df_fixture_knockout_stage.replace(knockout_mapping, inplace=True)

df_fixture_knockout_stage["Winner"] = "?"

get_winner(df_fixture_knockout_stage)

Unnamed: 0,Home,Score,Away,Year,Winner
36,Hungary,Match 38,Italy,2024,Italy
37,Germany,Match 37,Denmark,2024,Germany
38,England,Match 40,Croatia,2024,England
39,Spain,Match 39,Switzerland,2024,Spain
40,France,Match 42,Romania,2024,France
41,Portugal,Match 41,Ukraine,2024,Portugal
42,Belgium,Match 43,Poland,2024,Belgium
43,Netherlands,Match 44,Czech Republic,2024,Netherlands


# Quarter Finals Prediction

In [102]:
update_table(df_fixture_knockout_stage, df_fixture_quarter_finals)
get_winner(df_fixture_quarter_finals)

Unnamed: 0,Home,Score,Away,Year,Winner
44,Spain,Match 45,Germany,2024,Spain
45,Portugal,Match 46,France,2024,Portugal
46,England,Match 48,Italy,2024,Italy
47,Belgium,Match 47,Netherlands,2024,Netherlands


# Semi Finals Prediction

In [103]:
update_table(df_fixture_quarter_finals, df_fixture_semi_finals)
get_winner(df_fixture_semi_finals)

Unnamed: 0,Home,Score,Away,Year,Winner
48,Spain,Match 49,Portugal,2024,Spain
49,Netherlands,Match 50,Italy,2024,Italy


# Final Prediction

In [104]:
update_table(df_fixture_semi_finals, df_fixture_final)
get_winner(df_fixture_final)

Unnamed: 0,Home,Score,Away,Year,Winner
50,Spain,Match 51,Italy,2024,Italy
