In [1]:
import numpy as np
import pandas as pd
import pickle
from collections import defaultdict
from joblib import Parallel, delayed
from datetime import date
import scraping as s
import warnings
warnings.filterwarnings("ignore")

In [2]:
def CambioElo(resultado, Elo1, Elo2, k = 10):
  """
  Calcula el valor del Elo

  Args:
    resultado (str): V Victoria E Empate D Derrota
    Elo1 (int, float): Elo del primer equipo
    Elo2 (int, float): Elo del segundo equipo
    k (int): indice de ponderacion (por defecto se estable en 10)

  Returns
    float: Valor del nuevo Elo
  """
  R = {'V': 1, 'E' : 0.5, 'D' : 0}
  div = 10**(-(Elo1-Elo2)/400)+1
  E = 1/div
  return (R[resultado]-E)*k

In [3]:
def NuevoElo(resultado, Elo1, Elo2, k = 10):
  """
  Cambia el valor del Elo

  Args:
    resultado (str): V Victoria E Empate D Derrota
    Elo1 (int, float): Elo del primer equipo
    Elo2 (int, float): Elo del segundo equipo
    k (int): indice de ponderacion (por defecto se estable en 10)

  Returns
    tupla: (Nuevo Elo Local, Nuevo Elo visitante)
  """
  Elo = CambioElo(resultado, Elo1, Elo2, k)
  Elo_Local = Elo1 + Elo
  Elo_Visitante = Elo2 - Elo
  return (Elo_Local, Elo_Visitante)

In [4]:
def PartidosFaltantes(url, liga):
  """
  WebScrapping de los partidos

  Args:
    url (str): url para obtener los partidos

  Returns:
    df (Dataframe): df con partidos faltantes
  """
  #Obtener df con todos los partidos
  df = s.WebScrapingPartidos(url, liga)
  #Filtrar los partidos que no se han jugado
  df = df[df['Marcador'] == '']
  #Eliminar columnas innecesarias
  df.drop(['Día', 'Hora', 'xG', 'Marcador', 'xG','Asistencia',
           'Sedes', 'Árbitro', 'Liga', 'Gol_local','Gol_visitante'], 
           axis=1, 
           inplace=True)

  return df

In [5]:
def resultado_campeon(clasificacion):
  """
  Obtiene los equipos que son campeones de liga

  Args:
    clasificacion (list): Nombre de equipos en orden por puntos

  Returns
    str: Nombre del equipo
  """
  return clasificacion[0]

In [6]:
def resultado_champion(clasificacion):
  """
  Obtiene los equipos que clasifican a Champion League

  Args:
    clasificacion (list): Nombre de equipos en orden por puntos

  Returns
    str: Nombre del equipo
  """
  return clasificacion[:4]

In [7]:
def resultado_Europa(clasificacion):
  """
  Obtiene los equipos que clasifican a Europa League

  Args:
    clasificacion (list): Nombre de equipos en orden por puntos

  Returns
    str: Nombre del equipo
  """
  return clasificacion[4:6]

In [8]:
def resultado_Descenso(clasificacion):
  """
  Obtiene los equipos que descienden

  Args:
    clasificacion (list): Nombre de equipos en orden por puntos

  Returns
    str: Nombre del equipo
  """
  return clasificacion[len(clasificacion) - 3:]

# Obtención de los datos necesarios para realizar la simulación

In [9]:
#Obtención de los partidos que faltan por jugar
temporada = '2024-2025'
Liga = 'La-Liga'
cod_Liga = 12

url = f'https://fbref.com/es/comps/{cod_Liga}/{temporada}/horario/Marcadores-y-partidos-de-{temporada}-{Liga}'
partidos = PartidosFaltantes(url, Liga)
partidos.reset_index(drop=True, inplace = True)

In [10]:
partidos.head()

Unnamed: 0,Sem.,Fecha,Local,Visitante
0,21,2025-01-24,Las Palmas,Osasuna
1,21,2025-01-25,Mallorca,Betis
2,21,2025-01-25,Atlético Madrid,Villarreal
3,21,2025-01-25,Sevilla,Espanyol
4,21,2025-01-25,Valladolid,Real Madrid


In [11]:
#Obtención de la tabla Elo por jornada
año = date.today().strftime('%Y-%m-%d')
url = f'http://api.clubelo.com/{año}'
Elo = s.WebScrapingElo(url, ['ESP'], año)
Elo.set_index('Club', inplace = True)

In [12]:
Elo.head()

Unnamed: 0_level_0,Elo,Fecha
Club,Unnamed: 1_level_1,Unnamed: 2_level_1
Real Madrid,1954.102173,2025-01-20
Barcelona,1886.748169,2025-01-20
Atlético Madrid,1854.54248,2025-01-20
Athletic Club,1815.903809,2025-01-20
Real Sociedad,1732.735474,2025-01-20


In [13]:
#Obtención de la tabla de posiciones por jornada
url = f'https://fbref.com/es/comps/{cod_Liga}/{temporada}/Estadisticas-{temporada}-{Liga}'
tabla = s.WebScrappingTabla(url)

In [14]:
tabla.head()

Unnamed: 0_level_0,PJ,Pts
Equipo,Unnamed: 1_level_1,Unnamed: 2_level_1
Real Madrid,20,46
Atlético Madrid,20,44
Barcelona,20,39
Athletic Club,20,39
Villarreal,20,33


# Llamar modelos

In [15]:
nombre = '../Modelo/model.pkl'
model = pickle.load(open(nombre, 'rb'))
nombre2 = '../Modelo/LE.pkl'
LE = pickle.load(open(nombre2, 'rb'))

In [16]:
#Agregar los equipos ascendidos al LE en caso de que no esten
ascensos = ['Leganés','Valladolid', 'Espanyol']
for i in ascensos:
  if i not in LE.classes_:
    nuevas_clases = np.append(LE.classes_, i)
    LE.classes_ = nuevas_clases

# Simulación de la liga

In [17]:
def Simulacion(partidos, tabla, Elo):
  """
  Simula la liga Española

  Args:
    partidos (Dataframe): Partidos por jugar de la liga
    tabla (Dataframe, Dict): Tabla de posiciones
    Elo (Dataframe, Dict): Puntuacion Elo de los equipos antes del juego

  Returns:
    df (Dataframe): df con tabla de posiciones de la liga ordenada por el numero de puntos
  """

  #Crea una copia de los partidos, Elo y tabla de posiciones
  partidos_copy = partidos.copy()
  Elo_copy = Elo['Elo'].to_dict()
  tabla_copy = tabla.to_dict()

  #Itera por todos los partidos
  for i in range(len(partidos)):

    #Obtiene el nombre del equipo local y visitante
    Local_ = partidos_copy.at[i,'Local']
    Visitante_ = partidos_copy.at[i,'Visitante']

    #Obtiene el elo del equipo local y visitante
    Elo_local = Elo_copy[Local_]
    Elo_visitante = Elo_copy[Visitante_]

    #Transforma el nombre del equipo local y visitante
    Local = LE.transform([Local_])[0]
    Visitante = LE.transform([Visitante_])[0]

    #Se guardan los datos necesarios en una lista 2D y se realiza el calculo de probabilidades
    data = [[Local, Visitante, Elo_local, Elo_visitante]]
    prediccion = model.predict_proba(data)
    
    # De acuerdo a las probabilidades selecciona si es derrota, empate o victoria
    resultado = np.random.choice(['D', 'E', 'V'], p = prediccion[0])

    #Agrega un partido a la tabla para local y visitante
    tabla_copy['PJ'][Local_] += 1
    tabla_copy['PJ'][Visitante_] += 1

    #Calula el nuevo Elo de los equipos
    Nuevo_Elo = NuevoElo(resultado, Elo_local, Elo_visitante)

    #Asigna el nuevo Elo al diccionario Elo
    Elo_copy[Local_] = Nuevo_Elo[0]
    Elo_copy[Visitante_] = Nuevo_Elo[1]

    #Asigna puntos dependiendo del resultado
    if resultado == 'V':
      tabla_copy['Pts'][Local_] += 3
    elif resultado == 'E':
      tabla_copy['Pts'][Local_] += 1
      tabla_copy['Pts'][Visitante_] += 1
    else:
      tabla_copy['Pts'][Visitante_] += 3

  #Tranforma la tabla en un DataFrame
  tabla = pd.DataFrame.from_dict(tabla_copy)
  return tabla.sort_values('Pts', ascending = False)

In [18]:
def realizar_simulaciones(partidos, tabla, Elo, Num_sim):

  """
  Simula la liga Española y calcula las probabilidades de seer campeon, claisifcar a chmpion a europa o descender

  Args:
    partidos (Dataframe): Partidos por jugar de la liga
    tabla (Dataframe, Dict): Tabla de posiciones
    Elo (Dataframe, Dict): Puntuacion Elo de los equipos antes del juego
    Num_sim (int): Numero de simulacines a realizar

  Returns:
    Dict: Campeones probabilidades de ser campeon por equipo
          Champions probabilidades de clasificar a Champion League por equipo
          Europa probabilidades de clasificar a Europa League por equipo
          Descenso probabilidades de descender por equipo
  """
  campeones = defaultdict(int)
  champion = defaultdict(int)
  Europa = defaultdict(int)
  Descenso = defaultdict(int)

  # Paralelizar las simulaciones
  resultados = Parallel(n_jobs=-1)(delayed(Simulacion)(partidos, tabla, Elo) for _ in range(Num_sim))

  # Contar los campeones
  for tabla_res in resultados:
    campeon = resultado_campeon(tabla_res.index)
    campeones[campeon] += (1/ Num_sim)*100

  # Contar los clasificacos a champions
  for tabla_res in resultados:
    champions = resultado_champion(tabla_res.index)
    for i in champions:
      champion[i] += (1/ Num_sim)*100

  # Contar los clasificacos a Europa
  for tabla_res in resultados:
    Europa_l = resultado_Europa(tabla_res.index)
    for i in Europa_l:
      Europa[i] += (1/ Num_sim)*100

  # Contar los Descendidos
  for tabla_res in resultados:
    des = resultado_Descenso(tabla_res.index)
    for i in des:
      Descenso[i] += (1/ Num_sim)*100

  return campeones, champion, Europa, Descenso

In [19]:
#Prueba de la funcion simulacion
tabla_ = Simulacion(partidos, tabla, Elo)
tabla_

Unnamed: 0,PJ,Pts
Real Madrid,38,92
Barcelona,38,87
Atlético Madrid,38,81
Athletic Club,38,76
Girona,38,60
Villarreal,38,55
Sevilla,38,54
Celta Vigo,38,51
Real Sociedad,38,50
Mallorca,38,50


In [20]:
# Simular 1000 veces la liga
campeones, champion, Europa, Descenso = realizar_simulaciones(partidos, tabla, Elo, Num_sim=1000)

## Guardar resultados de la simulación

In [21]:
# Convirte los resultados en un df
C = pd.DataFrame(list(campeones.items()), columns=['Equipo', 'J20'])
Ch = pd.DataFrame(list(champion.items()), columns=['Equipo', 'J20'])
E = pd.DataFrame(list(Europa.items()), columns=['Equipo', 'J20'])
D = pd.DataFrame(list(Descenso.items()), columns=['Equipo', 'J20'])

In [22]:
C

Unnamed: 0,Equipo,J20
0,Real Madrid,77.8
1,Atlético Madrid,14.0
2,Barcelona,7.3
3,Athletic Club,0.9


In [23]:
#Ruta de almacenamiento del historico por jornada
ruta = '../Data/Historial/campeones.csv'
ruta1 = '../Data/Historial/champion.csv'
ruta2 = '../Data/Historial/europa.csv'
ruta3 = '../Data/Historial/descenso.csv'

In [24]:
#Convierte el historico de las jornadas pasadas en un df
c0 = pd.read_csv(ruta)
ch1 = pd.read_csv(ruta1)
e2 = pd.read_csv(ruta2)
d3 = pd.read_csv(ruta3)

In [25]:
#Convina los resultados de la jornada actual con los anteriores
C = pd.merge(c0, C, on = 'Equipo', how = 'outer').fillna(0)
Ch = pd.merge(ch1, Ch, on = 'Equipo', how = 'outer').fillna(0)
E = pd.merge(e2, E, on = 'Equipo', how = 'outer').fillna(0)
D = pd.merge(d3, D, on = 'Equipo', how = 'outer').fillna(0)

In [26]:
C

Unnamed: 0,Equipo,J0,J1,J2,J3,J4,J5,J6,J7,J8,J10,J11,J12,J13,J14,J15,J16,J17,J18,J20
0,Athletic Club,0.7,0.1,0.2,0.3,0.1,0.1,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.5,0.4,0.9
1,Atlético Madrid,3.4,5.1,5.2,3.5,6.3,4.0,3.4,2.6,5.2,2.1,0.6,0.1,1.1,2.7,5.9,8.5,15.8,37.1,14.0
2,Barcelona,13.0,21.3,22.2,33.3,33.0,39.3,41.3,44.2,32.1,38.5,74.7,80.0,68.7,64.9,41.0,42.7,33.3,9.6,7.3
3,Betis,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,Girona,0.5,1.2,1.0,1.7,1.6,0.3,0.2,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,Real Madrid,81.9,72.3,71.2,61.1,59.0,56.3,55.0,53.1,62.4,59.3,24.7,19.9,30.2,32.3,52.9,48.8,50.4,52.9,77.8
6,Real Sociedad,0.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Villarreal,0.1,0.0,0.1,0.1,0.0,0.0,0.1,0.0,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.0,0.0,0.0


In [27]:
Ch

Unnamed: 0,Equipo,J0,J1,J2,J3,J4,J5,J6,J7,J8,J10,J11,J12,J13,J14,J15,J16,J17,J18,J20
0,Alavés,0.8,1.3,0.2,2.2,2.3,1.2,1.6,3.1,1.0,0.3,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,Athletic Club,36.9,33.2,27.6,33.5,19.6,27.5,31.6,47.0,38.0,46.6,48.7,42.0,35.1,38.9,53.1,75.3,83.9,90.8,96.6
2,Atlético Madrid,79.3,78.2,82.6,77.2,87.0,88.0,90.2,93.6,95.1,96.1,91.3,91.5,93.4,98.3,98.5,98.7,99.8,99.8,99.9
3,Barcelona,92.8,96.9,96.5,98.4,98.7,99.6,99.9,99.8,99.8,100.0,100.0,100.0,100.0,100.0,99.9,100.0,99.9,99.3,99.8
4,Betis,9.8,9.2,9.6,11.4,9.2,9.6,9.3,6.5,6.2,5.2,8.6,14.9,9.1,1.5,1.2,0.7,1.8,0.4,0.0
5,Celta Vigo,2.7,2.6,5.1,2.2,1.3,1.5,1.4,2.0,0.4,0.9,1.1,0.8,0.7,1.0,0.6,0.1,0.1,0.0,0.0
6,Espanyol,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
7,Getafe,0.3,0.6,0.5,0.5,0.2,0.1,0.2,0.0,0.2,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,Girona,46.0,50.6,44.8,46.6,61.8,43.7,40.3,20.2,21.3,14.5,8.2,9.3,10.3,12.6,9.8,3.7,2.7,1.8,0.7
9,Las Palmas,0.0,0.1,0.1,0.0,0.0,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [28]:
E

Unnamed: 0,Equipo,J0,J1,J2,J3,J4,J5,J6,J7,J8,J10,J11,J12,J13,J14,J15,J16,J17,J18,J20
0,Alavés,6.3,3.9,4.2,7.3,13.0,6.0,10.0,12.2,7.4,1.8,0.7,0.8,0.9,1.4,0.3,0.1,0.5,0.2,0.9
1,Athletic Club,34.9,35.2,34.7,37.2,39.2,39.9,42.0,36.8,39.1,35.9,34.9,37.3,40.1,42.2,34.6,22.0,14.7,8.4,3.4
2,Atlético Madrid,15.4,16.1,13.8,17.4,11.4,10.9,8.2,5.7,4.6,3.5,7.9,7.6,5.6,1.5,1.4,1.3,0.2,0.2,0.1
3,Barcelona,6.1,2.7,3.3,1.5,1.2,0.3,0.1,0.2,0.2,0.0,0.0,0.0,0.0,0.0,0.1,0.0,0.1,0.7,0.2
4,Betis,24.6,26.0,25.6,26.0,29.7,29.2,24.9,22.4,23.5,24.2,30.0,30.9,29.9,16.7,9.0,12.9,29.8,23.3,5.7
5,Celta Vigo,6.9,9.2,14.1,10.1,8.3,10.0,9.1,8.4,5.0,10.8,6.2,5.7,3.8,6.6,2.9,5.6,4.3,7.3,4.0
6,Espanyol,0.3,0.1,0.0,0.0,0.0,0.7,0.4,0.2,0.0,0.3,0.0,0.1,0.0,0.1,0.0,0.0,0.0,0.0,0.0
7,Getafe,1.5,3.1,3.4,2.2,2.0,0.7,1.2,0.7,0.7,0.9,1.1,0.7,0.4,0.2,0.1,0.3,0.3,0.3,0.5
8,Girona,32.4,32.9,34.1,32.8,26.6,37.8,36.8,42.8,37.2,36.7,29.7,29.3,33.6,41.5,43.1,38.5,29.2,37.5,42.8
9,Las Palmas,0.2,0.2,0.2,0.1,0.3,0.1,0.2,0.0,0.0,0.0,0.0,0.0,0.2,0.0,0.3,0.8,0.9,0.8,0.4


In [29]:
D

Unnamed: 0,Equipo,J0,J1,J2,J3,J4,J5,J6,J7,J8,J10,J11,J12,J13,J14,J15,J16,J17,J18,J20
0,Alavés,5.2,8.4,7.8,2.9,1.3,1.9,2.1,1.1,2.1,6.7,11.5,6.8,9.8,11.2,17.1,13.6,16.6,8.9,10.8
1,Athletic Club,0.0,0.0,0.0,0.2,0.0,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,Betis,0.5,0.7,0.5,0.1,0.2,0.0,0.2,0.4,0.1,0.1,0.1,0.0,0.0,0.2,0.3,0.3,0.2,0.0,1.2
3,Celta Vigo,4.3,2.8,1.3,1.5,2.9,0.9,1.4,2.0,2.6,1.8,2.3,2.0,2.3,0.6,3.5,0.9,1.9,0.3,1.8
4,Espanyol,45.7,60.1,69.5,59.2,44.6,28.9,32.4,33.2,40.9,31.5,47.6,49.1,53.6,58.0,42.9,43.0,58.4,59.6,67.8
5,Getafe,13.0,11.7,12.6,13.3,13.3,17.2,20.4,25.2,15.5,14.2,14.6,17.4,27.6,21.2,25.8,27.0,18.6,29.4,18.0
6,Girona,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1,0.1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.1
7,Las Palmas,35.8,40.2,42.9,41.1,46.4,56.9,60.3,64.8,65.0,62.8,44.6,46.5,35.6,42.3,20.3,11.0,11.2,4.4,15.3
8,Leganés,70.3,69.0,58.5,57.1,65.8,68.7,69.0,75.4,70.1,72.2,65.7,58.7,49.6,50.8,49.6,55.6,32.6,36.4,24.7
9,Mallorca,11.1,9.3,14.2,15.8,7.6,11.0,8.1,2.2,0.6,0.7,0.7,0.4,0.6,0.7,0.2,0.6,0.5,0.0,0.1


In [33]:
# #Guarda los nuevos resultados en la ruta
# C.to_csv(ruta, index=False)
# Ch.to_csv(ruta1, index=False)
# E.to_csv(ruta2, index=False)
# D.to_csv(ruta3, index=False)

# Prediccion para un solo partido

In [31]:
def PrediccionPartido(L, V):
    """
    Calcula la probabilidad de Perder, empatar o ganar un partido un equipo que juega de local en la liga española

    Args:
        L (string): Nombre equipo Local
        V (string): Nombre equipo visitante

    Returns:
        Array_de_numpy: Probabilidades del equipo local [[D, E, V]]
    """

    #Se transforman los nombres de los equipos
    Local = LE.transform([L])[0]
    Visitante = LE.transform([V])[0]

    #Se encuentra el Elo de los equipos
    Elo_local = Elo.loc[L, 'Elo']
    Elo_visitante = Elo.loc[V, 'Elo']

    #Se agrega a una lista 2D para lluego calcular las probabilidad
    data = [[Local, Visitante, Elo_local, Elo_visitante]]
    return model.predict_proba(data)

In [32]:
L = 'Barcelona'
V = 'Atlético Madrid'
Prediccion = PrediccionPartido(L,V)
print("-"*50)
print(f'Local: {L}')
print(f'Visitante: {V}')
print("-"*50)
print(f'Probabilidad de victoria del local: {np.round(Prediccion[0][2]*100, 1)}%')
print(f'Probabilidad de empate: {np.round(Prediccion[0][1]*100, 1)}%')
print(f'Probabilidad de derrota del local: {np.round(Prediccion[0][0]*100, 1)}%')
print("-"*50)

--------------------------------------------------
Local: Barcelona
Visitante: Atlético Madrid
--------------------------------------------------
Probabilidad de victoria del local: 58.3%
Probabilidad de empate: 26.3%
Probabilidad de derrota del local: 15.5%
--------------------------------------------------
