In [179]:
import pandas as pd
import pickle 
from scipy.stats import poisson

In [180]:
dict_table=pickle.load(open('dict_table','rb'))
df_historical_data=pd.read_csv('clean_uefa_euros_historical_data.csv')
df_fixture=pd.read_csv('clean_uefa_euros_fixture.csv') 

<h1>1 Calculate Team Strength</h1>

In [181]:
#SPLIT DF INTO DF_HOME AND DF_AWAY
df_home=df_historical_data[['HomeTeam','home_score','away_score']]
df_away=df_historical_data[['AwayTeam','home_score','away_score']]

In [182]:
#rename columns
df_home=df_home.rename(columns={'HomeTeam':'Team','home_score':'GoalsScored','away_score':'GoalsConceded'})
df_away=df_away.rename(columns={'AwayTeam':'Team','home_score':'GoalsConceded','away_score':'GoalsScored'})

In [183]:
#concat df_home and df_away , group team and calculate the mean

df_team_strength=pd.concat([df_home,df_away],ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Albania,0.333333,1.0
Austria,0.7,1.2
Belgium,1.409091,1.272727
Bulgaria,0.666667,2.166667
CIS,0.333333,1.333333
Croatia,1.363636,1.272727
Czech Republic,1.241379,1.275862
Czechoslovakia,1.5,1.25
Denmark,1.272727,1.515152
England,1.342105,0.973684


<h1>2 Function predict points</h1>

<h3>
P (X =x) = (e^(– λ) .λ^x)/x!</h3>



In [184]:
def predict_points(home,away):
    if home in df_team_strength.index and away in df_team_strength.index:
        #goals scored * goals conceded
        lamb_home=df_team_strength.at[home,'GoalsScored']*df_team_strength.at[away,'GoalsConceded']
        lamb_away=df_team_strength.at[away,'GoalsScored']*df_team_strength.at[home,'GoalsConceded']

        prob_draw,prob_home,prob_away=0,0,0
        
        for x in range(0,11):#number of goals home team 
            for y in range(0,11):#number of goals away team
                p= poisson.pmf(x, lamb_home)*poisson.pmf(y,lamb_away)
                if x==y:
                    prob_draw +=p
                elif x>y:
                    prob_home +=p
                else:
                    prob_away +=p
        points_home=3*prob_home+prob_draw
        points_away=3*prob_away+prob_draw
        return (points_home, points_away)
    else:
        return (0,0)
                    




<h2>2.1 Testing function</h2>

In [185]:
#Test with matches: 

predict_points('France','Spain')


(1.2208849862837132, 1.5461254699206322)

<h1>3 Predict World Cup</h1>

In [186]:
#splitting fixture into group, knockout, quarter,..
df_fixture_group_36 = pd.DataFrame(df_fixture[:36].copy())

# Create DataFrame for df_fixture_knockout
df_fixture_knockout = pd.DataFrame(df_fixture[36:44].copy())

# Create DataFrame for df_fixture_quarter
df_fixture_quarter = pd.DataFrame(df_fixture[44:48].copy())

# Create DataFrame for df_fixture_semi
df_fixture_semi = pd.DataFrame(df_fixture[48:50].copy())

# Create DataFrame for df_fixture_final
df_fixture_final = pd.DataFrame(df_fixture[50:].copy())


In [187]:
df_fixture_group_36.replace({"home": "Play-off winner A", "away": "Play-off winner A"}, {"home": "Ukraine", "away": "Ukraine"}, inplace=True)

df_fixture_group_36.replace({"home": "Play-off winner C", "away": "Play-off winner C"}, {"home": "Wales", "away": "Wales"}, inplace=True)

df_fixture_group_36.replace({"home": "Play-off winner B", "away": "Play-off winner B"}, {"home": "Poland", "away": "Poland"}, inplace=True)
df_fixture_group_36


Unnamed: 0,home,score,away,year
0,Germany,Match 1,Scotland,2024
1,Hungary,Match 2,Switzerland,2024
2,Germany,Match 14,Hungary,2024
3,Scotland,Match 13,Switzerland,2024
4,Switzerland,Match 25,Germany,2024
5,Scotland,Match 26,Hungary,2024
6,Spain,Match 3,Croatia,2024
7,Italy,Match 4,Albania,2024
8,Croatia,Match 15,Albania,2024
9,Spain,Match 16,Italy,2024


In [188]:
dict_table['GroupA'].replace({'Team': {'Germany (H)': 'Germany'}}, inplace=True)


In [189]:
#check team that never played euros
unique_home_teams = df_fixture_group_36['home'].unique()

unique_historic_home_teams=df_historical_data['HomeTeam'].unique()

for i in unique_home_teams:
    if i not in unique_historic_home_teams:
        print(i)

Serbia


<h3>3.1 Group Stage</h3>

In [190]:
#run all the matches in the group stage and update group tables
for group in dict_table:
    teams_in_group= dict_table[group]['Team'].values
    df_fixture_group_6=df_fixture_group_36[df_fixture_group_36['home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home,away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending = False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] =dict_table[group]

In [191]:
dict_table['GroupC']

Unnamed: 0,Team,Pts
0,England,3.843532
1,Denmark,2.356964
2,Slovenia,2.18387
3,Serbia,0.0


<h2>3.2 Knockout</h2>

In [192]:
#check knockout
df_fixture_knockout

Unnamed: 0,home,score,away,year
36,Runner-up Group A,Match 38,Runner-up Group B,2024
37,Winner Group A,Match 37,Runner-up Group C,2024
38,Winner Group C,Match 40,3rd Group D/E/F,2024
39,Winner Group B,Match 39,3rd Group A/D/E/F,2024
40,Runner-up Group D,Match 42,Runner-up Group E,2024
41,Winner Group F,Match 41,3rd Group A/B/C,2024
42,Winner Group E,Match 43,3rd Group A/B/C/D,2024
43,Winner Group D,Match 44,Runner-up Group F,2024


In [193]:
#update the knock out fixture with group winner, runners up and third position
for group in dict_table:
    # Extract the winners and runners-up for the current group
    group_winner = dict_table[group].loc[0, 'Team']
    group_runner_up = dict_table[group].loc[1, 'Team']
    
    
    # Construct the replacement strings
    winner_replace_str = f'Winner {group[:5]} {group[-1:]}'
    runner_up_replace_str = f'Runner-up {group[:5]} {group[-1:]}'
    
    # Replace the values in df_fixture_knockout
    df_fixture_knockout.replace({winner_replace_str: group_winner,
                                  runner_up_replace_str: group_runner_up}, inplace=True)



# Define a list to store the third-placed teams from all groups
third_placed_teams = []

# Loop through each group in dict_table
for group in dict_table:
    # Extract the third-placed team for the current group
    third_placed_team = dict_table[group].loc[2, 'Team']
    third_placed_teams.append((third_placed_team, dict_table[group].loc[2, 'Pts']))

# Sort the third-placed teams based on their 'Pts' in descending order
sorted_third_placed_teams = sorted(third_placed_teams, key=lambda x: x[1], reverse=True)

# Extract the top four teams based on 'Pts'
top_four_teams = [team[0] for team in sorted_third_placed_teams[:4]]

# Replace the values in the 'away' column of df_fixture_knockout at indices 38, 39, 41, and 42
df_fixture_knockout.loc[[38, 39, 41, 42], 'away'] = top_four_teams

# Check the updated df_fixture_knockout
print(df_fixture_knockout)



           home     score            away  year
36      Hungary  Match 38           Italy  2024
37      Germany  Match 37         Denmark  2024
38      England  Match 40         Croatia  2024
39        Spain  Match 39     Switzerland  2024
40  Netherlands  Match 42         Romania  2024
41     Portugal  Match 41        Slovakia  2024
42      Belgium  Match 43        Slovenia  2024
43       France  Match 44  Czech Republic  2024


In [194]:
#create get_winner function
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away=predict_points(home,away)
        if points_home>points_away:
            winner =home
        else:
            winner= away
        df_fixture_updated.loc[index,'winner']=winner
    return df_fixture_updated

In [195]:
get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,year,winner
36,Hungary,Match 38,Italy,2024,Italy
37,Germany,Match 37,Denmark,2024,Germany
38,England,Match 40,Croatia,2024,England
39,Spain,Match 39,Switzerland,2024,Spain
40,Netherlands,Match 42,Romania,2024,Netherlands
41,Portugal,Match 41,Slovakia,2024,Portugal
42,Belgium,Match 43,Slovenia,2024,Belgium
43,France,Match 44,Czech Republic,2024,France


<h2>Quarter final</h2>

In [176]:

runner_up_replace_str = f'Runner-up {group[:5]} {group[-1:]}'

# Replace the values in df_fixture_knockout
df_fixture_quarter.replace({winner_replace_str: group_winner,
                              runner_up_replace_str: group_runner_up}, inplace=True)

In [208]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = row['winner']
        match_number = int(row['score'].split()[1])  # Extract match number from 'score' column
        df_fixture_round_2.replace({f'Winner Match {match_number}': winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2


In [209]:
update_table(df_fixture_knockout, df_fixture_quarter)


Unnamed: 0,home,score,away,year,winner
44,Spain,Match 45,Germany,2024,?
45,Portugal,Match 46,Netherlands,2024,?
46,England,Match 48,Italy,2024,?
47,Belgium,Match 47,France,2024,?


In [210]:
get_winner(df_fixture_quarter)


Unnamed: 0,home,score,away,year,winner
44,Spain,Match 45,Germany,2024,Spain
45,Portugal,Match 46,Netherlands,2024,Netherlands
46,England,Match 48,Italy,2024,Italy
47,Belgium,Match 47,France,2024,France


<h2>Semifinal</h2>

In [211]:
update_table(df_fixture_quarter, df_fixture_semi)


Unnamed: 0,home,score,away,year,winner
48,Spain,Match 49,Netherlands,2024,?
49,France,Match 50,Italy,2024,?


In [212]:
get_winner(df_fixture_semi)


Unnamed: 0,home,score,away,year,winner
48,Spain,Match 49,Netherlands,2024,Spain
49,France,Match 50,Italy,2024,Italy


<h2>Final</h2>

In [213]:
update_table(df_fixture_semi, df_fixture_final)


Unnamed: 0,home,score,away,year,winner
50,Spain,Match 51,Italy,2024,?


In [214]:
get_winner(df_fixture_final)


Unnamed: 0,home,score,away,year,winner
50,Spain,Match 51,Italy,2024,Italy
