In [1]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [166]:
dict_table = pickle.load(open('dict_table','rb'))
df_fifa = pd.read_csv('FIFA HISTORICAL DATA CLEANED.csv')
df_fixtures_2022 = pd.read_csv("FIFA 2022 FIXTURES CLEANED.csv")

In [160]:
df_fifa

Unnamed: 0,Home Team,Away Teams,Year,Home Goals,Away Goals,Total Goals
0,France,Mexico,1930,4,1,5
1,Uruguay,Argentina,1930,4,2,6
2,Uruguay,Yugoslavia,1930,6,1,7
3,Argentina,United States,1930,6,1,7
4,Paraguay,Belgium,1930,1,0,1
...,...,...,...,...,...,...
895,Serbia,Brazil,2018,0,2,2
896,Serbia,Switzerland,2018,1,2,3
897,Brazil,Costa Rica,2018,2,0,2
898,Costa Rica,Serbia,2018,0,1,1


<h1> CALCULATING TEAM STRENGTH AS MEAN GOALS SCORED AND CONCEDED </h1>

In [12]:
df_home = df_fifa[['Home Team','Home Goals','Away Goals']]  # Splitting the Dataframe into home and away teams
df_away = df_fifa[['Away Teams','Home Goals','Away Goals']]

In [17]:
df_home = df_home.rename(columns = {'Home Team':'Team', 'Home Goals': "Goals Scored", "Away Goals":"Goals Conceded"})
df_away = df_away.rename(columns = {'Home Team':'Team', 'Home Goals': "Goals Conceded", "Away Goals":"Goals Scored"})

In [27]:
df_team_strength = pd.concat([df_home,df_away],ignore_index = True).groupby("Team").mean("Goals Scored","Goals Conceded")

In [161]:
df_team_strength

Unnamed: 0_level_0,Goals Scored,Goals Conceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,0.833333,1.666667
Angola,0.000000,1.000000
Argentina,1.913793,0.862069
Australia,1.000000,1.857143
Austria,2.285714,1.571429
...,...,...
Uruguay,2.125000,1.125000
Wales,2.000000,1.000000
West Germany,2.500000,0.894737
Yugoslavia,2.733333,0.600000


<h1> Defining a function for prediction </h1>

In [167]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'Goals Scored'] * df_team_strength.at[away,'Goals Conceded']
        lamb_away = df_team_strength.at[away,'Goals Scored'] * df_team_strength.at[home,'Goals Conceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

<h1> Predicting World Cup Matches 2022 </h1>

In [168]:
df_fixture_group_48 = df_fixtures_2022[:48].copy()
df_fixture_knockout = df_fixtures_2022[48:56].copy()
df_fixture_quarter = df_fixtures_2022[56:60].copy()
df_fixture_semi = df_fixtures_2022[60:62].copy()
df_fixture_final = df_fixtures_2022[62:].copy()

<h2> Group Stage Matches</h2>

In [169]:
for group in dict_table:
    teams_in_group = dict_table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group_48[df_fixture_group_48['Home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['Home'], row['Away']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

In [170]:
dict_table['Group G']

Unnamed: 0,Team,Pts
0,Brazil,8.0
1,Switzerland,6.0
2,Cameroon,3.0
3,Serbia,1.0


<h2> Round of 16 </h2>

In [171]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    df_fixture_knockout.replace({f'Winners {group}':group_winner,
                                 f'Runners-up {group}':runners_up}, inplace=True)

df_fixture_knockout['Winner'] = '?'
df_fixture_knockout

Unnamed: 0,Home,Score,Away,Winner
48,Netherlands,Match 49,Wales,?
49,Argentina,Match 50,Denmark,?
50,England,Match 51,Ecuador,?
51,France,Match 52,Mexico,?
52,Germany,Match 53,Croatia,?
53,Brazil,Match 54,Uruguay,?
54,Belgium,Match 55,Spain,?
55,Portugal,Match 56,Switzerland,?


In [172]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['Home'], row['Away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'Winner'] = winner
    return df_fixture_updated

In [173]:
get_winner(df_fixture_knockout)

Unnamed: 0,Home,Score,Away,Winner
48,Netherlands,Match 49,Wales,Netherlands
49,Argentina,Match 50,Denmark,Argentina
50,England,Match 51,Ecuador,England
51,France,Match 52,Mexico,France
52,Germany,Match 53,Croatia,Germany
53,Brazil,Match 54,Uruguay,Brazil
54,Belgium,Match 55,Spain,Belgium
55,Portugal,Match 56,Switzerland,Portugal


<h2> Quater Finals </h2>

In [179]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'Winner']
        match = df_fixture_round_1.loc[index, 'Score']
        df_fixture_round_2.replace({f'Winners {match}':winner}, inplace=True)
    df_fixture_round_2['Winner'] = '?'
    return df_fixture_round_2

In [180]:
update_table(df_fixture_knockout, df_fixture_quarter)

Unnamed: 0,Home,Score,Away,Winner
56,Netherlands,Match 57,Argentina,?
57,Germany,Match 58,Brazil,?
58,England,Match 59,France,?
59,Belgium,Match 60,Portugal,?


In [181]:
get_winner(df_fixture_quarter)

Unnamed: 0,Home,Score,Away,Winner
56,Netherlands,Match 57,Argentina,Argentina
57,Germany,Match 58,Brazil,Brazil
58,England,Match 59,France,England
59,Belgium,Match 60,Portugal,Portugal


<h2> Semi Finals</h2>

In [183]:
update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,Home,Score,Away,Winner
60,Argentina,Match 61,Brazil,?
61,England,Match 62,Portugal,?


In [184]:
get_winner(df_fixture_semi)

Unnamed: 0,Home,Score,Away,Winner
60,Argentina,Match 61,Brazil,Brazil
61,England,Match 62,Portugal,England


<h2> Finals </h2>

In [185]:
update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,Home,Score,Away,Winner
62,Losers Match 61,Match 63,Losers Match 62,?
63,Brazil,Match 64,England,?


In [186]:
get_winner(df_fixture_final)

Unnamed: 0,Home,Score,Away,Winner
62,Losers Match 61,Match 63,Losers Match 62,Losers Match 62
63,Brazil,Match 64,England,England
