# Predicción

In [21]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [None]:
dict_table=pickle.load(open("dict_table","rb"))
df_historical_data=pd.read_csv("clean_mundiales.csv")
df_actual_data=pd.read_csv("clean_wordcup_fixtue.csv")

#### Construimos un dataframe con la media de goles anotados y recibidos por equipo en todos los mundiales

In [23]:
df_home=df_historical_data.loc[:,["HomeTeam","homeGoals","awayGoals"]]
df_away=df_historical_data.loc[:,["AwayTeam","homeGoals","awayGoals"]]

In [24]:
df_home.rename(columns={"HomeTeam":"Team","homeGoals":"Goalsscored","awayGoals":"Goalsrecieved"},inplace=True)
df_away.rename(columns={"AwayTeam":"Team","homeGoals":"Goalsrecieved","awayGoals":"Goalsscored"},inplace=True)

In [25]:
df_team_strength=pd.concat([df_home,df_away],ignore_index=True).groupby("Team").agg("mean").reset_index()

In [26]:
df_team_strength

Unnamed: 0,Team,Goalsscored,Goalsrecieved
0,Algeria,1.000000,1.461538
1,Angola,0.333333,0.666667
2,Argentina,1.691358,1.148148
3,Australia,0.812500,1.937500
4,Austria,1.482759,1.620690
...,...,...,...
80,Uruguay,1.553571,1.321429
81,Wales,0.800000,0.800000
82,WestGermany,2.098361,1.213115
83,Yugoslavia,1.666667,1.272727


#### Construimos una función para predecir con la distribución de Poisson.

In [125]:
def predict_points(home, away):
    if home in df_team_strength.Team.unique() and away in df_team_strength.Team.unique():

        lamb_home = df_team_strength.loc[df_team_strength["Team"]==home,"Goalsscored"].values[0] \
                                            * df_team_strength.loc[df_team_strength["Team"]==away,"Goalsrecieved"].values[0]
        
        lamb_away = df_team_strength.loc[df_team_strength["Team"]==away,"Goalsscored"].values[0]\
                                            * df_team_strength.loc[df_team_strength["Team"]==home,"Goalsrecieved"].values[0]
        prob_home, prob_away, prob_draw = 0, 0, 0
        for x in range(0,11):
            for y in range(0, 11):
                p = poisson.pmf(x, lamb_home) * poisson.pmf(y, lamb_away)
                if x == y:
                    prob_draw += p
                elif x > y:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw
        return (points_home, points_away)
    else:
        return (0, 0)

#### Testeamos la función

In [128]:
print(predict_points("Argentina","Mexico"))
print(predict_points("Argentina","Spain"))
print(predict_points("France","Sweden"))

(2.3129151525530505, 0.5378377125059863)
(1.4695837906683096, 1.3159335527729064)
(1.8383363476307306, 0.9760309434418984)


#### Predieciendo el mundial

In [130]:
df_actual_data_group_48 = df_actual_data[:48].copy()
df_actual_data_knockout = df_actual_data[48:56].copy()
df_actual_data_quarter = df_actual_data[56:60].copy()
df_actual_data_semi = df_actual_data[60:62].copy()
df_actual_data_final = df_actual_data[62:].copy()

#### Fase de grupos

In [149]:
for group in dict_table:
    teams_in_group = dict_table[group]['Team'].values
    df_actual_data_group_6 = df_actual_data_group_48[df_actual_data_group_48['home'].isin(teams_in_group)]
    for index, row in df_actual_data_group_6.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        dict_table[group].loc[dict_table[group]['Team'] == home, 'Pts'] += points_home
        dict_table[group].loc[dict_table[group]['Team'] == away, 'Pts'] += points_away

    dict_table[group] = dict_table[group].sort_values('Pts', ascending=False).reset_index()
    dict_table[group] = dict_table[group][['Team', 'Pts']]
    dict_table[group] = dict_table[group].round(0)

In [154]:
print(dict_table)

{'Grupo A':           Team  Pts
0  Netherlands  4.0
1      Senegal  2.0
2      Ecuador  2.0
3    Qatar (H)  0.0, 'Grupo B':             Team  Pts
0        England  6.0
1          Wales  3.0
2           Iran  2.0
3  United States  0.0, 'Grupo C':            Team  Pts
0     Argentina  7.0
1        Poland  6.0
2        Mexico  1.0
3  Saudi Arabia  0.0, 'Grupo D':         Team  Pts
0     France  7.0
1    Denmark  6.0
2    Tunisia  3.0
3  Australia  2.0, 'Grupo E':          Team  Pts
0       Spain  5.0
1     Germany  4.0
2       Japan  3.0
3  Costa Rica  0.0, 'Grupo F':       Team  Pts
0  Croatia  7.0
1  Belgium  6.0
2  Morocco  4.0
3   Canada  0.0, 'Grupo G':           Team  Pts
0       Brazil  8.0
1  Switzerland  4.0
2       Serbia  3.0
3     Cameroon  2.0, 'Grupo H':           Team  Pts
0      Uruguay  5.0
1     Portugal  4.0
2        Ghana  2.0
3  South Korea  0.0}


#### Knockout

In [181]:
for group in dict_table:
    group_winner = dict_table[group].loc[0, 'Team']
    runners_up = dict_table[group].loc[1, 'Team']
    group=group.replace(" ","")
    group=group.replace("Grupo","Group")
    df_actual_data_knockout.home.replace({f'Winners{group}':group_winner}, inplace=True)
    df_actual_data_knockout.away.replace({f'Runners-up{group}':runners_up}, inplace=True)

df_actual_data_knockout['winner'] = '?'
print(df_actual_data_knockout)

           home     score         away  year winner
48  Netherlands  Match 49        Wales  2022      ?
49    Argentina  Match 50      Denmark  2022      ?
50       France  Match 52       Poland  2022      ?
51      England  Match 51      Senegal  2022      ?
52        Spain  Match 53      Belgium  2022      ?
53       Brazil  Match 54     Portugal  2022      ?
54      Croatia  Match 55      Germany  2022      ?
55      Uruguay  Match 56  Switzerland  2022      ?


In [188]:
def get_winner(df_actual_data_updated):
    for index, row in df_actual_data_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        df_actual_data_updated.loc[index, 'winner'] = winner
    return df_actual_data_updated

In [189]:
get_winner(df_actual_data_knockout)

Unnamed: 0,home,score,away,year,winner
48,Netherlands,Match 49,Wales,2022,Netherlands
49,Argentina,Match 50,Denmark,2022,Argentina
50,France,Match 52,Poland,2022,France
51,England,Match 51,Senegal,2022,England
52,Spain,Match 53,Belgium,2022,Spain
53,Brazil,Match 54,Portugal,2022,Brazil
54,Croatia,Match 55,Germany,2022,Germany
55,Uruguay,Match 56,Switzerland,2022,Uruguay


#### Quarter finals

In [197]:
def update_table(df_fixture_round_1, df_fixture_round_2):
    for index, row in df_fixture_round_1.iterrows():
        winner = df_fixture_round_1.loc[index, 'winner']
        match = df_fixture_round_1.loc[index, 'score']
        match = match.replace(" ","")
        df_fixture_round_2.home.replace({f'Winners{match}':winner}, inplace=True)
        df_fixture_round_2.away.replace({f'Winners{match}':winner}, inplace=True)
    df_fixture_round_2['winner'] = '?'
    return df_fixture_round_2

In [198]:
update_table(df_actual_data_knockout, df_actual_data_quarter)

Unnamed: 0,home,score,away,year,winner
56,Spain,Match 58,Brazil,2022,?
57,Netherlands,Match 57,Argentina,2022,?
58,Germany,Match 60,Uruguay,2022,?
59,England,Match 59,France,2022,?


In [199]:
get_winner(df_actual_data_quarter)

Unnamed: 0,home,score,away,year,winner
56,Spain,Match 58,Brazil,2022,Brazil
57,Netherlands,Match 57,Argentina,2022,Netherlands
58,Germany,Match 60,Uruguay,2022,Germany
59,England,Match 59,France,2022,France


#### Semifinals

In [201]:

update_table(df_actual_data_quarter, df_actual_data_semi)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,?
61,France,Match 62,Germany,2022,?


In [202]:
get_winner(df_actual_data_semi)

Unnamed: 0,home,score,away,year,winner
60,Netherlands,Match 61,Brazil,2022,Brazil
61,France,Match 62,Germany,2022,Germany


#### Final

In [203]:
update_table(df_actual_data_semi, df_actual_data_final)

Unnamed: 0,home,score,away,year,winner
62,LosersMatch61,Match 63,LosersMatch62,2022,?
63,Brazil,Match 64,Germany,2022,?


In [204]:
get_winner(df_actual_data_final)

Unnamed: 0,home,score,away,year,winner
62,LosersMatch61,Match 63,LosersMatch62,2022,LosersMatch62
63,Brazil,Match 64,Germany,2022,Brazil
