In [1]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [3]:
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('clean_FIFA_World_Cup_Historical_Data.csv')
df_fixture = pd.read_csv('Clean_FIFA_World_Cup_Fixtures_Data.csv')

In [4]:
# Split df into df_home and df_away

df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [5]:
# Rename Columns

df_home = df_home.rename(columns = {'HomeTeam' : 'Team', 'HomeGoals' : 'GoalsScored', 'AwayGoals' : 'GoalsConceded'})
df_away = df_away.rename(columns = {'AwayTeam' : 'Team', 'HomeGoals' : 'GoalsConceded', 'AwayGoals' : 'GoalsScored'})

In [6]:
# Concatenate df_home and df_away, group by team and calculate the Mean

df_team_strength = pd.concat([df_home, df_away], ignore_index = True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.000000,1.461538
Angola,0.333333,0.666667
Argentina,1.691358,1.148148
Australia,0.812500,1.937500
Austria,1.482759,1.620690
...,...,...
Uruguay,1.553571,1.321429
Wales,0.800000,0.800000
West Germany,2.112903,1.241935
Yugoslavia,1.666667,1.272727


In [7]:
### Function predict_points

def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # Goals_Scored * Goals_Conceded
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away, 'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home, 'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11):    # Number of goals by home team
            for y in range(0,11):   # Number of goals by away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

In [8]:
### Testing Function
# Test with matches

predict_points('Argentina', 'Mexico')
predict_points('England', 'United States')
predict_points('Qatar (H)', 'Ecuador')

(0, 0)

In [9]:
# Splitting Fixture into Group, Knockout, Quarter, ...

df_fixture_group_48 = df_fixture[:48].copy()
df_fixture_knockout = df_fixture[48:56].copy()
df_fixture_quarter = df_fixture[56:60].copy()
df_fixture_semi = df_fixture[60:62].copy()
df_fixture_final = df_fixture[62:].copy()

In [11]:
# Run all the matches in the group stage and update group tables

for group in dict_table:
    team_in_group = dict_table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group_48[df_fixture_group_48['HomeTeam'].isin(team_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        HomeTeam, AwayTeam = row['HomeTeam'], row['AwayTeam']
        points_Home, points_Away = predict_points(HomeTeam, AwayTeam)
        dict_table[group].loc[dict_table[group]['Team'] == HomeTeam, 'Pts'] += points_Home
        dict_table[group].loc[dict_table[group]['Team'] == AwayTeam, 'Pts'] += points_Away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending = False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

In [13]:
dict_table['Group B']

Unnamed: 0,Team,Pts
0,England,13.0
1,United States,8.0
2,Wales,6.0
3,Iran,5.0


In [14]:
df_fixture_knockout

Unnamed: 0,HomeTeam,Score,AwayTeam,Year
48,Netherlands,3–1,United States,2022
49,Argentina,2–1,Australia,2022
50,France,3–1,Poland,2022
51,England,3–0,Senegal,2022
52,Japan,1–1 (a.e.t.),Croatia,2022
53,Brazil,4–1,South Korea,2022
54,Morocco,0–0 (a.e.t.),Spain,2022
55,Portugal,6–1,Switzerland,2022


In [15]:
# Update the Knock out fixture with group winner an runners up

for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']

    df_fixture_knockout.replace({f'Winners {group}': group_winner, f'Runners_up {group}': runners_up}, inplace = True)

df_fixture_knockout['Winner'] = '?'
df_fixture_knockout   

Unnamed: 0,HomeTeam,Score,AwayTeam,Year,Winner
48,Netherlands,3–1,United States,2022,?
49,Argentina,2–1,Australia,2022,?
50,France,3–1,Poland,2022,?
51,England,3–0,Senegal,2022,?
52,Japan,1–1 (a.e.t.),Croatia,2022,?
53,Brazil,4–1,South Korea,2022,?
54,Morocco,0–0 (a.e.t.),Spain,2022,?
55,Portugal,6–1,Switzerland,2022,?


In [16]:
# Create get_winner function

def get_winner(df_fixture_updated):
    for i in df_fixture_updated.iterrows():
        HomeTeam, AwayTeam = row['HomeTeam'], row['AwayTeam']
        points_Home, points_Away = predict_points(HomeTeam, AwayTeam)
        if points_Home > points_Away:
            winner = HomeTeam
        else:
            winner = AwayTeam
        df_fixture_updated.loc[index, 'Winner'] = winner
    return df_fixture_updated    

In [18]:
get_winner(df_fixture_knockout)

Unnamed: 0,HomeTeam,Score,AwayTeam,Year,Winner
48,Netherlands,3–1,United States,2022.0,?
49,Argentina,2–1,Australia,2022.0,?
50,France,3–1,Poland,2022.0,?
51,England,3–0,Senegal,2022.0,?
52,Japan,1–1 (a.e.t.),Croatia,2022.0,?
53,Brazil,4–1,South Korea,2022.0,?
54,Morocco,0–0 (a.e.t.),Spain,2022.0,?
55,Portugal,6–1,Switzerland,2022.0,?
47,,,,,Portugal


In [28]:
# Create update_table function

def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.loc[index, 'Winner']:
        match = df_fixture_round_1.loc[index, 'Score']
        df_fixture_round_2.replace({f'Winner {match}': Winner}, inplace = True)
    df_fixture_round_2['Winner'] = '?'
    return df_fixture_round_2

In [None]:
# Quarter Final

update_table(df_fixture_knockout, df_fixture_quarter)

In [None]:
get_winner(df_fixture_quarter)

In [None]:
# Semi Final

update_table(df_fixture_quarter, df_fixture_semi)

In [None]:
get_winner(df_fixture_semi)

In [None]:
# Final

update_table(df_fixture_semi, df_fixture_final)

In [None]:
get_winner(df_fixture_final)