# SmartBet AI ⚽ – Mejora con Rating ELO
Este notebook añade el cálculo de rating ELO por equipo con base en resultados históricos. Se usa este puntaje como variable predictora en lugar de los IDs de equipo para mejorar el modelo.

In [None]:
!pip install pandas scikit-learn matplotlib

In [None]:

import pandas as pd
from sklearn.ensemble import RandomForestClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, classification_report
import matplotlib.pyplot as plt

# Cargar datos
urls = [
    "https://www.football-data.co.uk/mmz4281/2324/E0.csv",
    "https://www.football-data.co.uk/mmz4281/2223/E0.csv",
    "https://www.football-data.co.uk/mmz4281/2122/E0.csv"
]

columnas = ["Date", "HomeTeam", "AwayTeam", "FTR"]
df = pd.concat([pd.read_csv(url, usecols=columnas, encoding="ISO-8859-1") for url in urls])
df.dropna(inplace=True)
df["Date"] = pd.to_datetime(df["Date"], dayfirst=True)
df.sort_values("Date", inplace=True)

# Inicializar ratings ELO
elo_ratings = {}
K = 30
default_elo = 1500

def get_elo(team):
    return elo_ratings.get(team, default_elo)

def update_elo(winner, loser, draw=False):
    Ra = get_elo(winner)
    Rb = get_elo(loser)
    Ea = 1 / (1 + 10 ** ((Rb - Ra) / 400))
    Eb = 1 / (1 + 10 ** ((Ra - Rb) / 400))
    if draw:
        Sa, Sb = 0.5, 0.5
    else:
        Sa, Sb = 1.0, 0.0
    elo_ratings[winner] = Ra + K * (Sa - Ea)
    elo_ratings[loser] = Rb + K * (Sb - Eb)

elos_home, elos_away = [], []

for _, row in df.iterrows():
    home = row["HomeTeam"]
    away = row["AwayTeam"]
    result = row["FTR"]

    home_elo = get_elo(home)
    away_elo = get_elo(away)
    elos_home.append(home_elo)
    elos_away.append(away_elo)

    if result == "H":
        update_elo(home, away)
    elif result == "A":
        update_elo(away, home)
    elif result == "D":
        update_elo(home, away, draw=True)

df["elo_home"] = elos_home
df["elo_away"] = elos_away


In [None]:

# Preparar variables
X = df[["elo_home", "elo_away"]]
y = df["FTR"]

# Entrenar modelo
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.3, random_state=42)
model = RandomForestClassifier(n_estimators=100, random_state=42)
model.fit(X_train, y_train)

# Evaluar
print("Precisión con ELO ratings:", accuracy_score(y_test, model.predict(X_test)))
print(classification_report(y_test, model.predict(X_test)))

# Visualizar distribución de ELOs
plt.figure(figsize=(8,4))
plt.hist(df["elo_home"], bins=30, alpha=0.6, label="Local")
plt.hist(df["elo_away"], bins=30, alpha=0.6, label="Visitante")
plt.title("Distribución de ratings ELO")
plt.legend()
plt.show()
