In [6]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [7]:
dict_table = pickle.load(open('dict_table','rb'))
df_historical_data = pd.read_csv('clean_america_worldcup_matches.csv')
df_fixture = pd.read_csv('clean_america_worldcup_fixture.csv')

In [8]:
df_home = df_historical_data[['Home', 'HomeGoals', 'AwayGoals']]

In [9]:
df_away = df_historical_data[['Away', 'HomeGoals', 'AwayGoals']]

In [10]:


df_home = df_home.rename(columns={'Home':'Team', 'HomeGoals': 'GoalsScored', 'AwayGoals': 'GoalsConceded'})
df_away = df_away.rename(columns={'Away':'Team', 'HomeGoals': 'GoalsConceded', 'AwayGoals': 'GoalsScored'})

df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby(['Team']).mean()
df_team_strength

Unnamed: 0_level_0,GoalsScored,GoalsConceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,1.813953,0.854651
Bolivia,0.754098,1.721311
Brazil,1.989362,0.728723
Chile,1.45679,1.154321
Colombia,1.202247,0.893258
Costa Rica,1.032258,1.870968
Ecuador,1.16129,1.637097
Haiti,0.333333,4.0
Honduras,1.166667,0.833333
Jamaica,0.0,1.666667


In [11]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        # goals_scored * goals_conceded
        lamb_home = df_team_strength.at[home,'GoalsScored'] * df_team_strength.at[away,'GoalsConceded']
        lamb_away = df_team_strength.at[away,'GoalsScored'] * df_team_strength.at[home,'GoalsConceded']
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11): #number of goals home team
            for y in range(0, 11): #number of goals away team
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

In [14]:
print(df_team_strength.index)


Index(['Argentina', 'Bolivia', 'Brazil', 'Chile', 'Colombia', 'Costa Rica',
       'Ecuador', 'Haiti', 'Honduras', 'Jamaica', 'Japan', 'Mexico', 'Panama',
       'Paraguay', 'Peru', 'Qatar', 'United States', 'Uruguay', 'Venezuela'],
      dtype='object', name='Team')


In [18]:
points_home, points_away = predict_points('Costa Rica', 'Colombia')
print((float(points_home), float(points_away)))

(0.6071015644487924, 2.206919976816745)


In [24]:
# Partidos de la fase de grupos (los primeros 24 partidos)
df_fixture_group = df_fixture[:24].copy()

# Partidos de los cuartos de final (partidos 25-28)
df_fixture_quarter_finals = df_fixture[24:28].copy()

# Partidos de las semifinales (partidos 29-30)
df_fixture_semi_finals = df_fixture[28:30].copy()

# Partido por el tercer lugar (partido 31)
df_fixture_third_place = df_fixture[30:31].copy()

# Partido final (partido 32)
df_fixture_final = df_fixture[31:].copy()


Unnamed: 0,0,Team,2,3,4,5,6,7,8,9,10,11,12
0,,Quarter-finals,Quarter-finals,,,Semi-finals,Semi-finals,,,Final,Final,,
1,,,,,,,,,,,,,
2,,"July 4 – Houston, TX","July 4 – Houston, TX",,,,,,,,,,
3,,"July 4 – Houston, TX","July 4 – Houston, TX",,,,,,,,,,
4,,Winner Group A,,,,,,,,,,,
5,,Winner Group A,,,,"July 9 – East Rutherford, NJ","July 9 – East Rutherford, NJ",,,,,,
6,,Runner-up Group B,,,,"July 9 – East Rutherford, NJ","July 9 – East Rutherford, NJ",,,,,,
7,,Runner-up Group B,,,,Winner Match 25,,,,,,,
8,,"July 5 – Arlington, TX","July 5 – Arlington, TX",,,Winner Match 25,,,,,,,
9,,"July 5 – Arlington, TX","July 5 – Arlington, TX",,,Winner Match 26,,,,,,,


In [28]:
for group in dict_table:
    teams_in_group = dict_table[group]['Team'].values
    df_fixture_group_6 = df_fixture_group[df_fixture_group['Home'].isin(teams_in_group)]
    for index, row in df_fixture_group_6.iterrows():
        home, away = row['Home'], row['Away']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
  dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home


KeyError: 'Pts'

In [35]:
df_fixture_quarter_finals


Unnamed: 0,Home,Away,Score,year
24,Winner Group A,Runner-up Group B,Match 25,2024
25,Winner Group B,Runner-up Group A,Match 26,2024
26,Winner Group C,Runner-up Group D,Match 27,2024
27,Winner Group D,Runner-up Group C,Match 28,2024


In [39]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    df_fixture_quarter_finals.replace({f'Winners {group}':group_winner,
                                 f'Runners-up {group}':runners_up}, inplace=True)

df_fixture_quarter_finals['winner'] = '?'
df_fixture_quarter_finals

Unnamed: 0,Home,Away,Score,year,winner
24,Winner Group A,Runner-up Group B,Match 25,2024,?
25,Winner Group B,Runner-up Group A,Match 26,2024,?
26,Winner Group C,Runner-up Group D,Match 27,2024,?
27,Winner Group D,Runner-up Group C,Match 28,2024,?


In [37]:
def get_winner(df_fixture_updated):
    for index, row in df_fixture_updated.iterrows():
        home, away = row['Home'], row['Away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_fixture_updated.loc[index, 'winner'] = winner
    return df_fixture_updated

In [38]:
get_winner(df_fixture_quarter_finals)

Unnamed: 0,Home,Away,Score,year,winner
24,Winner Group A,Runner-up Group B,Match 25,2024,Runner-up Group B
25,Winner Group B,Runner-up Group A,Match 26,2024,Runner-up Group A
26,Winner Group C,Runner-up Group D,Match 27,2024,Runner-up Group D
27,Winner Group D,Runner-up Group C,Match 28,2024,Runner-up Group C
