#Predicting Results for FIFA 2022 using Machine Learning

##Import libraries


In [None]:
import pandas as pd
import pickle
from scipy.stats import poisson

##Import datasets

In [None]:
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('clean_fifa_worldcup_matches.csv')
df_fixtures = pd.read_csv('clean_fifa_worldcup_fixture.csv')

In [None]:
df_historical_data

Unnamed: 0,HomeTeam,AwayTeam,Year,HomeGoals,AwayGoals,TotalGoals
0,France,Mexico,1930,4,1,5
1,Uruguay,Argentina,1930,4,2,6
2,Uruguay,Yugoslavia,1930,6,1,7
3,Argentina,United States,1930,6,1,7
4,Paraguay,Belgium,1930,1,0,1
...,...,...,...,...,...,...
895,Brazil,Costa Rica,2018,2,0,2
896,Serbia,Switzerland,2018,1,2,3
897,Serbia,Brazil,2018,0,2,2
898,France,Peru,2018,1,0,1


##Calculating team strength

split df_historical_data to df_home and df_away

In [None]:
df_home = df_historical_data[['HomeTeam', 'HomeGoals', 'AwayGoals']]
df_away = df_historical_data[['AwayTeam', 'HomeGoals', 'AwayGoals']]

In [None]:
df_home = df_home.rename(columns={'HomeTeam': 'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceeded'})
df_away = df_away.rename(columns={'AwayTeam': 'Team', 'HomeGoals': 'GoalsConceeded', 'AwayGoals': 'GoalsScored'})

In [None]:
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceeded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Algeria,1.000000,1.461538
Angola,0.333333,0.666667
Argentina,1.691358,1.148148
Australia,0.812500,1.937500
Austria,1.482759,1.620690
...,...,...
Uruguay,1.553571,1.321429
Wales,0.800000,0.800000
West Germany,2.112903,1.241935
Yugoslavia,1.666667,1.272727


##Function predict_points

In [None]:
def predict_points(home, away):
  if home in df_team_strength.index and away in df_team_strength.index:
    lambda_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceeded']
    lambda_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceeded']
    probHome, probAway, probDraw = 0, 0, 0
    for x in range(0,11):
      for y in range(0,11):
        p = poisson.pmf(x, lambda_home) * poisson.pmf(y, lambda_away)
        if(x==y):
          probDraw += p;
        elif(x>y):
          probHome += p;
        else:
          probAway += p;
    points_home = 3*probHome + probDraw
    points_away = 3*probAway + probDraw
    return (points_home, points_away)
  else:
    return (0,0)

In [None]:
predict_points('Argentina', 'Mexico')
predict_points('Qatar', 'Ecuador')
predict_points('England', 'United States')

(2.2356147635326007, 0.5922397535606193)

##Predicting World Cup

### Split fixtures

In [None]:
df_fixture_groups = df_fixtures[:48].copy()
df_fixture_knockout = df_fixtures[48:56].copy()
df_fixture_quarter = df_fixtures[56:60].copy()
df_fixture_semi = df_fixtures[60:62].copy()
df_fixture_final = df_fixtures[62:].copy()

### Group stage

In [None]:
for group in dict_table:
  teams_in_group = dict_table[group]['Team'].values
  # seperating by group
  df_fixture_group_6 = df_fixture_groups[df_fixture_groups['home'].isin(teams_in_group)]
  for index, row in df_fixture_group_6.iterrows():
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    dict_table[group].loc[dict_table[group]['Team']==home, 'Pts'] += points_home
    dict_table[group].loc[dict_table[group]['Team']==away, 'Pts'] += points_away

  dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
  dict_table[group] = dict_table[group][['Team', 'Pts']]
  dict_table[group] = dict_table[group].round(0)

In [None]:
dict_table['Group B']

Unnamed: 0,Team,Pts
0,England,6.0
1,Wales,5.0
2,United States,3.0
3,Iran,2.0


### Round of 16

In [None]:
df_fixture_knockout

Unnamed: 0,home,score,away,year
48,Winners Group A,Match 49,Runners-up Group B,2022
49,Winners Group C,Match 50,Runners-up Group D,2022
50,Winners Group D,Match 52,Runners-up Group C,2022
51,Winners Group B,Match 51,Runners-up Group A,2022
52,Winners Group E,Match 53,Runners-up Group F,2022
53,Winners Group G,Match 54,Runners-up Group H,2022
54,Winners Group F,Match 55,Runners-up Group E,2022
55,Winners Group H,Match 56,Runners-up Group G,2022


In [None]:
for group in dict_table:
  group_winner = dict_table[group].loc[0, 'Team']
  runners_up = dict_table[group].loc[1, 'Team']
  df_fixture_knockout.replace({f'Winners {group}': group_winner, f'Runners-up {group}': runners_up}, inplace=True)

df_fixture_knockout['winner'] = '?'
df_fixture_knockout

Unnamed: 0,home,score,away,year,winner
48,Netherlands,Match 49,Wales,2022,?
49,Argentina,Match 50,Denmark,2022,?
50,France,Match 52,Poland,2022,?
51,England,Match 51,Senegal,2022,?
52,Germany,Match 53,Belgium,2022,?
53,Brazil,Match 54,Uruguay,2022,?
54,Croatia,Match 55,Spain,2022,?
55,Portugal,Match 56,Switzerland,2022,?


In [None]:
def get_winner(df_fixture_updated):
  for index, row in df_fixture_updated.iterrows():
    home, away = row['home'], row['away']
    points_home, points_away = predict_points(home, away)
    if(points_home > points_away):
      winner = home
    else:
      winner = away
    df_fixture_updated.loc[index, 'winner'] = winner
  return df_fixture_updated

In [None]:
get_winner(df_fixture_knockout)

Unnamed: 0,home,score,away,year,winner
48,Netherlands,Match 49,Wales,2022,Netherlands
49,Argentina,Match 50,Denmark,2022,Argentina
50,France,Match 52,Poland,2022,France
51,England,Match 51,Senegal,2022,England
52,Germany,Match 53,Belgium,2022,Germany
53,Brazil,Match 54,Uruguay,2022,Brazil
54,Croatia,Match 55,Spain,2022,Spain
55,Portugal,Match 56,Switzerland,2022,Portugal


### Quarter Finals

In [None]:
def update_table(df_fixture_round_1, df_fixture_round_2):
  for index, row in df_fixture_round_1.iterrows():
    winner = df_fixture_round_1.loc[index, 'winner']
    match = df_fixture_round_1.loc[index, 'score']
    df_fixture_round_2.replace({f'Winners {match}': winner}, inplace=True)
  df_fixture_round_2['winner'] = '?'
  return df_fixture_round_2

In [None]:
update_table(df_fixture_knockout, df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
56,Germany,Match 58,Brazil,2022,?
57,Netherlands,Match 57,Argentina,2022,?
58,Spain,Match 60,Portugal,2022,?
59,England,Match 59,France,2022,?


In [None]:
get_winner(df_fixture_quarter)

Unnamed: 0,home,score,away,year,winner
56,Germany,Match 58,Brazil,2022,Brazil
57,Netherlands,Match 57,Argentina,2022,Netherlands
58,Spain,Match 60,Portugal,2022,Portugal
59,England,Match 59,France,2022,France


### Semi Finals

In [None]:
update_table(df_fixture_quarter, df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,?
61,France,Match 62,Portugal,2022,?


In [None]:
get_winner(df_fixture_semi)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,Brazil
61,France,Match 62,Portugal,2022,France


### Finals

In [None]:
update_table(df_fixture_semi, df_fixture_final)

Unnamed: 0,home,score,away,year,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,?
63,Brazil,Match 64,France,2022,?


In [None]:
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,winner
62,Losers Match 61,Match 63,Losers Match 62,2022,Losers Match 62
63,Brazil,Match 64,France,2022,Brazil
