In [1]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [3]:
#Código del modelo
dict_tables = pickle.load(open('dict_tables', 'rb'))
df_data = pd.read_csv('Conmebol_Copa_America_complete_data.csv')
df_fixture = pd.read_csv('Programacion_Copa_America_2024.csv')

In [4]:
for group in dict_tables:
    dict_tables[group]['Pts'] = 0

dict_tables['Group C'].loc[2, 'Team'] = 'United States'
dict_tables['Group D'].loc[0, 'Team'] = 'Colombia'
dict_tables['Group D'].loc[3, 'Team'] = 'Paraguay'

for i in range(22):
    df_fixture.loc[i, 'score'] = f'Match {i+1}'

df_fixture.loc[24, 'home'] = 'Winner Group A'
df_fixture.loc[24, 'away'] = 'Runner-up Group B'
df_fixture.loc[25, 'home'] = 'Winner Group B'
df_fixture.loc[25, 'away'] = 'Runner-up Group A'
df_fixture.loc[26, 'away'] = 'Runner-up Group C'
df_fixture.loc[27, 'home'] = 'Winner Group C'

In [5]:
dict_tables['Group D']

Unnamed: 0,Pos,Team,Pld,W,D,L,GF,GA,GD,Pts
0,1,Colombia,3,2,1,0,6,2,+4,0
1,2,Brazil,3,1,2,0,5,2,+3,0
2,3,Costa Rica,3,1,1,1,2,4,−2,0
3,4,Paraguay,3,0,0,3,3,8,−5,0


In [6]:
df_fixture

Unnamed: 0,home,score,away,year
0,Argentina,Match 1,Canada,2024
1,Peru,Match 2,Chile,2024
2,Peru,Match 3,Canada,2024
3,Chile,Match 4,Argentina,2024
4,Argentina,Match 5,Peru,2024
5,Canada,Match 6,Chile,2024
6,Ecuador,Match 7,Venezuela,2024
7,Mexico,Match 8,Jamaica,2024
8,Ecuador,Match 9,Jamaica,2024
9,Venezuela,Match 10,Mexico,2024


## **Calcular Team Strength**

In [7]:
#Dividir df_data en df_home y df_away
df_home =  df_data[['home', 'home_goals', 'away_goals']]
df_away =  df_data[['away', 'home_goals', 'away_goals']]

In [8]:
#Renombrar Columnas
df_home = df_home.rename(columns={'home':'Team', 'home_goals':'Goals_Scored', 'away_goals':'Goals_Conceded'})
df_away = df_away.rename(columns={'away':'Team', 'home_goals':'Goals_Conceded', 'away_goals':'Goals_Scored'})

In [9]:
df_team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()
df_team_strength

Unnamed: 0_level_0,Goals_Scored,Goals_Conceded
Team,Unnamed: 1_level_1,Unnamed: 2_level_1
Argentina,2.322917,0.875
Bolivia,0.920354,2.433628
Brazil,2.243094,1.038674
Chile,1.565217,1.652174
Colombia,1.142857,1.555556
Costa Rica,1.0,1.823529
Ecuador,1.064516,2.604839
Haiti,0.333333,4.0
Honduras,1.166667,0.833333
Jamaica,0.0,1.5


## **Función de predicción**

In [10]:
def predict_points(home, away):
    if home in df_team_strength.index and away in df_team_strength.index:
        #goals_scored*goals_conceded
        lamb_home = df_team_strength.at[home, 'Goals_Scored'] * df_team_strength.at[away, 'Goals_Conceded']
        lamb_away = df_team_strength.at[away, 'Goals_Scored'] * df_team_strength.at[home, 'Goals_Conceded']
        prob_home, prob_away, prob_draw = 0, 0, 0

        for i in range(11): #Goles equipo local
            for j in range(11): #Goles equipo visitante
                p = poisson.pmf(i, lamb_home) * poisson.pmf(j, lamb_away)
                if i == j:
                    prob_draw += p
                elif i > j:
                    prob_home += p
                else:
                    prob_away += p
        
        points_home = 3 * prob_home + prob_draw
        points_away = 3 * prob_away + prob_draw

        return (points_home, points_away)
    else:
        return (0,0)

In [11]:
df_fixture_groups = df_fixture[:24].copy()
df_fixture_quarter = df_fixture[24:28].copy()
df_fixture_semi = df_fixture[28:30].copy()
df_fixture_final = df_fixture[30:].copy()

In [12]:
for group in dict_tables:
    teams_in_group = dict_tables[group]['Team'].values
    df_fixture_real_matches = df_fixture_groups[df_fixture_groups['home'].isin(teams_in_group)]
    for index, row in df_fixture_real_matches.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        dict_tables[group].loc[dict_tables[group]['Team'] == home, 'Pts'] += points_home
        dict_tables[group].loc[dict_tables[group]['Team'] == away, 'Pts'] += points_away 
        
    dict_tables[group] = dict_tables[group].sort_values('Pts', ascending=False).reset_index() 
    dict_tables[group] = dict_tables[group][['Team', 'Pts']]
    dict_tables[group] = dict_tables[group].round(0)

In [13]:
print(dict_tables['Group A'])
print("\n")
print(dict_tables['Group B'])
print("\n")
print(dict_tables['Group C'])
print("\n")
print(dict_tables['Group D'])
print("\n")

        Team  Pts
0  Argentina  5.0
1       Peru  2.0
2      Chile  2.0
3     Canada  0.0


        Team  Pts
0     Mexico  8.0
1    Ecuador  5.0
2  Venezuela  4.0
3    Jamaica  1.0


            Team  Pts
0        Uruguay  8.0
1  United States  4.0
2         Panama  2.0
3        Bolivia  2.0


         Team  Pts
0      Brazil  8.0
1    Paraguay  4.0
2    Colombia  3.0
3  Costa Rica  2.0




In [14]:
df_fixture_quarter

Unnamed: 0,home,score,away,year
24,Winner Group A,1–1,Runner-up Group B,2024
25,Winner Group B,1–1,Runner-up Group A,2024
26,Colombia,5–0,Runner-up Group C,2024
27,Winner Group C,0–0,Brazil,2024


In [15]:
for group in dict_tables:
    group_winner = dict_tables[group].loc[0, 'Team']
    group_runners_up = dict_tables[group].loc[1, 'Team']

    df_fixture_quarter.replace({f'Winner {group}': group_winner,
                                f'Runner-up {group}':group_runners_up}, inplace=True)
    
df_fixture_quarter['Winner'] = ''
df_fixture_quarter

Unnamed: 0,home,score,away,year,Winner
24,Argentina,1–1,Ecuador,2024,
25,Mexico,1–1,Peru,2024,
26,Colombia,5–0,United States,2024,
27,Uruguay,0–0,Brazil,2024,


In [16]:
df_fixture_semi

Unnamed: 0,home,score,away,year
28,Argentina,2–0,Canada,2024
29,Uruguay,0–1,Colombia,2024


In [17]:
def get_winner(df_updated):
    for index, row in df_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        
        df_updated.loc[index, 'Winner'] = winner

    return df_updated

In [18]:
#Crear función que actualiza cada fase del juego

def update_table(df_fixture_1, df_fixture_2):
    for index, row in df_fixture_1.iterrows():
        winner = df_fixture_1.loc[index, 'Winner']
        match = df_fixture_1.loc[index, 'score']
        df_fixture_2.replace({f'Winner {match}':winner}, inplace=True)
    df_fixture_2['Winner'] = ''
    return df_fixture_2

In [19]:
def get_winner(df_updated):
    for index, row in df_updated.iterrows():
        home, away = row['home'], row['away']
        points_home, points_away = predict_points(home, away)
        if points_home > points_away:
            winner = home
        else:
            winner = away
        
        df_updated.loc[index, 'Winner'] = winner

    return df_updated

df_fixture_quarter = get_winner(df_fixture_quarter)
df_fixture_quarter

Unnamed: 0,home,score,away,year,Winner
24,Argentina,1–1,Ecuador,2024,Argentina
25,Mexico,1–1,Peru,2024,Mexico
26,Colombia,5–0,United States,2024,Colombia
27,Uruguay,0–0,Brazil,2024,Brazil


In [20]:
df_fixture_semi = update_table(df_fixture_quarter, df_fixture_semi)

In [21]:
df_fixture_semi = get_winner(df_fixture_semi)
df_fixture_semi

Unnamed: 0,home,score,away,year,Winner
28,Argentina,2–0,Canada,2024,Canada
29,Uruguay,0–1,Colombia,2024,Uruguay


In [22]:
df_fixture_final = update_table(df_fixture_semi, df_fixture_final)
get_winner(df_fixture_final)

Unnamed: 0,home,score,away,year,Winner
30,Canada,2–2,Uruguay,2024,Uruguay
31,Argentina,1–0 (a.e.t.),Colombia,2024,Argentina
