In [2]:
import pandas as pd
import pickle
from scipy.stats import poisson

In [3]:
df_historical = pd.read_csv('historial_cleaning.csv')
df_fixture_cleaned = pd.read_csv('fixture_cleaned.csv')
dict_table = pickle.load(open('groups_file','rb'))

In [4]:
df_home = df_historical[['Local','HomeGoals','AwayGoals']]
df_away = df_historical[['Visitante','HomeGoals','AwayGoals']]

In [5]:
df_home = df_home.rename(columns={'Local':'Team','HomeGoals':'GoalsScored','AwayGoals':'GoalsConceded'})
df_away = df_away.rename(columns={'Visitante': 'Team','HomeGoals':'GoalsConceded','AwayGoals':'GoalsScored'})

In [6]:
team_strength = pd.concat([df_home, df_away], ignore_index=True).groupby('Team').mean()

In [7]:
def predict_points(home,away):
  if home in team_strength.index and away in team_strength.index:
    lamb_home = team_strength.at[home,'GoalsScored'] * team_strength.at[away,'GoalsConceded']
    lamb_away = team_strength.at[away,'GoalsScored'] * team_strength.at[home,'GoalsConceded']
    prob_home,prob_away,prob_draw = 0,0,0
    for x in range(0,11):
      for y in range(0,11):
        p = poisson.pmf(x,lamb_home) * poisson.pmf(y,lamb_away)
        if x == y:
          prob_draw += p
        elif x > y:
          prob_home += p
        else:
          prob_away += p
    points_home = (3 * prob_home + prob_draw)
    points_away = (3 * prob_away + prob_draw)
    return (points_home, points_away)
  else:
    return (0,0)

In [8]:
for group in dict_table:
  teams_in_group = dict_table[group]['Equipo'].values
  for i in range(4):
    for j in range(4):
      if i != j:
        points_home, points_away = predict_points(teams_in_group[i],teams_in_group[j])
        dict_table[group].loc[dict_table[group]['Equipo'] == teams_in_group[i], 'Pts.'] +=  points_home
        dict_table[group].loc[dict_table[group]['Equipo'] == teams_in_group[j], 'Pts.'] +=  points_away
  dict_table[group] = dict_table[group].sort_values('Pts.', ascending=False).reset_index()
  dict_table[group] = dict_table[group][['Equipo','Pts.']]

In [9]:
df_octavos = pd.DataFrame(columns=['Team1','Team2','Winner'])
group_winners = []
group_runners = []
for group in dict_table:
  group_winners.append(dict_table[group]['Equipo'].iloc[0])
  group_runners.append(dict_table[group]['Equipo'].iloc[1])
# Definir el patrón de intercambio usando operaciones matemáticas y sucesión
patron = lambda i: (i + 1) if i % 2 == 0 else (i - 1)
group_runners = [group_runners[patron(i)] for i in range(len(group_runners))]


df_octavos['Team1'] = group_winners
df_octavos['Team2'] = group_runners
df_octavos

Unnamed: 0,Team1,Team2,Winner
0,Flamengo,Nacional,
1,Internacional,Racing Club,
2,Palmeiras,Fluminense,
3,River Plate,Cerro Porteño,
4,Corinthians,Colo-Colo,
5,Boca Juniors,Argentinos Juniors,
6,Atlético Mineiro,Olimpia,
7,Atlético Nacional,Athletico Paranaense,


In [15]:
def get_winner_key(df):
  for index, row in df.iterrows():
    ida_local, ida_visitante = predict_points(row['Team1'],row['Team2'])
    vuelta_local, vuelta_visitante = predict_points(row['Team2'], row['Team1'])
    total_team1 = ida_local + vuelta_visitante
    total_team2 = ida_visitante + vuelta_local
    if total_team1 > total_team2:
      winner = row['Team1']
    else:
      winner = row['Team2']
    df.at[index, 'Winner'] = winner

get_winner_key(df_octavos)

Flamengo
Internacional
Palmeiras
River Plate
Corinthians
Boca Juniors
Atlético Mineiro
Atlético Nacional


In [21]:
df_octavos

Unnamed: 0,Team1,Team2,Winner
0,Flamengo,Nacional,Flamengo
1,Internacional,Racing Club,Internacional
2,Palmeiras,Fluminense,Palmeiras
3,River Plate,Cerro Porteño,River Plate
4,Corinthians,Colo-Colo,Corinthians
5,Boca Juniors,Argentinos Juniors,Boca Juniors
6,Atlético Mineiro,Olimpia,Atlético Mineiro
7,Atlético Nacional,Athletico Paranaense,Atlético Nacional


In [27]:
df_cuartos = pd.DataFrame(columns=['Team1','Team2','Winner'])

def armar_key(df_pre,df_post):
  for index,winner in df_pre['Winner'].items():
    if index % 2 == 0:
      df_post.loc[index // 2, 'Team1'] = winner
    else:
      df_post.loc[index // 2, 'Team2'] = winner

armar_key(df_octavos,df_cuartos)
get_winner_key(df_cuartos)
df_cuartos

Internacional
Palmeiras
Boca Juniors
Atlético Mineiro


Unnamed: 0,Team1,Team2,Winner
0,Flamengo,Internacional,Internacional
1,Palmeiras,River Plate,Palmeiras
2,Corinthians,Boca Juniors,Boca Juniors
3,Atlético Mineiro,Atlético Nacional,Atlético Mineiro


In [29]:
df_semis = pd.DataFrame(columns=['Team1','Team2','Winner'])
armar_key(df_cuartos,df_semis)
get_winner_key(df_semis)
df_semis

Palmeiras
Boca Juniors


Unnamed: 0,Team1,Team2,Winner
0,Internacional,Palmeiras,Palmeiras
1,Boca Juniors,Atlético Mineiro,Boca Juniors


In [31]:
df_final = pd.DataFrame(columns=['Team1', 'Team2', 'Winner'])
armar_key(df_semis,df_final)
get_winner_key(df_final)
df_final

Palmeiras


Unnamed: 0,Team1,Team2,Winner
0,Palmeiras,Boca Juniors,Palmeiras
