In [1]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [18]:
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('cleaned_fifa_worldcup_matches.csv')
df_fixture = pd.read_csv('cleaned_fifa_worldcup_2022_fixtures.csv')
df_historical_data

Unnamed: 0,HomeTeam,AwayTeam,Year,HomeGoals,AwayGoals,TotalGoals
0,France,Mexico,1930,4,1,5
1,Uruguay,Argentina,1930,4,2,6
2,Uruguay,Yugoslavia,1930,6,1,7
3,Argentina,United States,1930,6,1,7
4,Paraguay,Belgium,1930,1,0,1
...,...,...,...,...,...,...
1303,Brazil,Costa Rica,2018,2,0,2
1304,Serbia,Switzerland,2018,1,2,3
1305,Serbia,Brazil,2018,0,2,2
1306,Germany,Mexico,2018,0,1,1


In [None]:
# calculating team strength
df_home = df_historical_data[['HomeTeam', 'HomeGoals',	'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals',	'AwayGoals']]

#renaming the coloumns appropriately
df_home = df_home.rename(columns={'HomeTeam':'Team', 'HomeGoals':'GoalsScored',	'AwayGoals':'GoalsConceded'})
df_away = df_away.rename(columns={'AwayTeam':'Team', 'HomeGoals':'GoalsConceded',	'AwayGoals':'GoalsScored'})

df_team_strength = pd.concat([df_home,df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

In [16]:
# creating a function to predict the points accumulated

def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p

        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

# testing the function
predict_points('Argentina', 'Germany')
predict_points('Argentina', 'France')
predict_points('Argentina', 'Brazil')

(0.9899427454179797, 1.8072343086019618)

In [None]:
# predicitng the worldcup

#group stage
df_fixture_group_48 = df_fixture[:48].copy()
df_fixture_knockout = df_fixture[48:56].copy()
df_fixture_quarter = df_fixture[56:60].copy()
df_fixture_semi = df_fixture[60:62].copy()
df_fixture_final = df_fixture[62:].copy()

for group in dict_table:
    teams_in_group = dict_table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group_48[df_fixture_group_48['home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)


In [31]:
dict_table['Group A']

Unnamed: 0,Team,Pts
0,Netherlands,4.0
1,Senegal,2.0
2,Ecuador,2.0
3,Qatar (H),0.0


In [32]:
dict_table['Group B']

Unnamed: 0,Team,Pts
0,England,6.0
1,Wales,5.0
2,United States,3.0
3,Iran,2.0


In [38]:
# knock outs

for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    df_fixture_knockout.replace({f'Winners {group}':group_winner,
                                 f'Runners-up {group}':runners_up}, inplace=True)

df_fixture_knockout['winner'] = '?'
df_fixture_knockout

Unnamed: 0,home,score,away,winner
48,Netherlands,Match 49,Wales,?
49,Argentina,Match 50,Denmark,?
50,France,Match 52,Poland,?
51,England,Match 51,Senegal,?
52,Germany,Match 53,Belgium,?
53,Brazil,Match 54,Uruguay,?
54,Croatia,Match 55,Spain,?
55,Portugal,Match 56,Switzerland,?


In [39]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,winner
48,Netherlands,Match 49,Wales,Netherlands
49,Argentina,Match 50,Denmark,Argentina
50,France,Match 52,Poland,France
51,England,Match 51,Senegal,England
52,Germany,Match 53,Belgium,Germany
53,Brazil,Match 54,Uruguay,Brazil
54,Croatia,Match 55,Spain,Spain
55,Portugal,Match 56,Switzerland,Portugal


In [37]:
# quarter-finals

def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        df_fixture_round_2.replace({f'Winners {match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

update_table(df_fixture_knockout, df_fixture_quarter)

Unnamed: 0,home,score,away,winner
56,Germany,Match 58,Brazil,?
57,Netherlands,Match 57,Argentina,?
58,Spain,Match 60,Portugal,?
59,England,Match 59,France,?


In [40]:
get_winner(df_fixture_quarter)

Unnamed: 0,home,score,away,winner
56,Germany,Match 58,Brazil,Brazil
57,Netherlands,Match 57,Argentina,Netherlands
58,Spain,Match 60,Portugal,Portugal
59,England,Match 59,France,England


In [41]:
# semi-finals

update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,home,score,away,winner
60,Netherlands,Match 61,Brazil,?
61,England,Match 62,Portugal,?


In [42]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,winner
60,Netherlands,Match 61,Brazil,Brazil
61,England,Match 62,Portugal,England


In [43]:
# finals

update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,home,score,away,winner
62,Losers Match 61,Match 63,Losers Match 62,?
63,Brazil,Match 64,England,?


In [44]:
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,winner
62,Losers Match 61,Match 63,Losers Match 62,Losers Match 62
63,Brazil,Match 64,England,Brazil
