# Tiros 

In [2]:
import pandas as pd
import numpy as np
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import TimeSeriesSplit, cross_val_score
import warnings

warnings.filterwarnings("ignore")

# ========================
# Funciones auxiliares
# ========================

def load_and_prepare_excel(path):
    """Lee un Excel con encabezados en 2 filas, aplana columnas y devuelve DataFrame con columna Sh lista."""
    df = pd.read_excel(path, header=[0, 1])

    # Aplanar nombres de columnas
    df.columns = [
        "_".join([str(x).strip() for x in col if str(x) != "nan"]).strip("_")
        for col in df.columns.values
    ]

    # Normalizar: si existe 'Standard_Sh' u otra variante, renombrar a 'Sh'
    rename_map = {}
    for c in df.columns:
        if "Sh" in c and c != "Sh":
            rename_map[c] = "Sh"
    if rename_map:
        df = df.rename(columns=rename_map)

    # Reset index
    df = df.reset_index(drop=True)
    return df

def add_rolling_features(df, window=5):
    """Agrega rolling features basadas en Sh (tiros)."""
    df_feat = df.copy()
    if "Sh" not in df_feat.columns:
        raise ValueError("El DataFrame no tiene columna 'Sh'. Revisa los encabezados.")
    df_feat["Sh_roll_mean_" + str(window)] = (
        df_feat["Sh"].shift(1).rolling(window=window).mean()
    )
    df_feat["Sh_roll_std_" + str(window)] = (
        df_feat["Sh"].shift(1).rolling(window=window).std()
    )
    df_feat["Sh_roll_mean_3"] = df_feat["Sh"].shift(1).rolling(window=3).mean()
    df_feat["matches_played"] = np.arange(len(df_feat)) + 1
    return df_feat

def build_train(df, target_col="Sh", min_rows=10, window=5):
    """Construye X, y para entrenamiento de predicción de tiros."""
    df_feat = add_rolling_features(df, window=window)
    feature_cols = ["Sh_roll_mean_" + str(window), "Sh_roll_std_" + str(window),
                    "Sh_roll_mean_3", "matches_played"]
    df_feat = df_feat.dropna(subset=feature_cols + [target_col])
    if len(df_feat) < min_rows:
        raise ValueError("Muy pocos partidos para entrenar el modelo.")
    X = df_feat[feature_cols].values
    y = df_feat[target_col].values
    return X, y, feature_cols, df_feat

def train_model(X, y):
    """Entrena RandomForest y devuelve modelo + métricas CV."""
    model = RandomForestRegressor(n_estimators=150, random_state=42)
    tscv = TimeSeriesSplit(n_splits=5)
    mae = -np.mean(cross_val_score(model, X, y, cv=tscv, scoring="neg_mean_absolute_error"))
    rmse = np.mean(
        np.sqrt(-cross_val_score(model, X, y, cv=tscv, scoring="neg_mean_squared_error"))
    )
    model.fit(X, y)
    return model, mae, rmse

def predict_next_shots_for_df(df, window=5, min_rows=10):
    """Predice tiros del siguiente partido para un DataFrame."""
    df_local = df.copy().reset_index(drop=True)

    # Ordenar por fecha si existe
    if "Date" in df_local.columns:
        try:
            df_local["Date"] = pd.to_datetime(df_local["Date"], errors="coerce")
            df_local = df_local.sort_values("Date").reset_index(drop=True)
        except Exception:
            pass

    if "Sh" not in df_local.columns:
        raise ValueError("El DataFrame no tiene columna 'Sh'.")

    X, y, feature_cols, df_f = build_train(df_local, target_col="Sh", min_rows=min_rows, window=window)
    model, mae, rmse = train_model(X, y)

    # Features de la última fila
    df_feat = add_rolling_features(df_local, window=window)
    last_row = df_feat.iloc[[-1]]
    feat = last_row[feature_cols].copy()

    # Rellenar NaN con medias
    col_means = np.nanmean(X, axis=0)
    feat_values = feat.values.astype(float)
    inds = np.where(np.isnan(feat_values))
    if len(inds[0]) > 0:
        feat_values[inds] = np.take(col_means, inds[1])

    pred = model.predict(feat_values)[0]

    return {
        "model": model,
        "mae_cv": mae,
        "rmse_cv": rmse,
        "pred_next_shots": float(pred),
        "feature_cols": feature_cols,
        "last_features": pd.DataFrame(feat_values, columns=feature_cols),
    }

def predict_match_total_shots(file_a, file_b, window=5, min_rows=10):
    """Predice tiros totales de un partido entre equipo A y B a partir de sus Excels."""
    df_a = load_and_prepare_excel(file_a)
    df_b = load_and_prepare_excel(file_b)

    print("Prediciendo equipo A ...")
    res_a = predict_next_shots_for_df(df_a, window=window, min_rows=min_rows)
    print(f" -> Predicción tiros next (A): {res_a['pred_next_shots']:.2f} (MAE CV: {res_a['mae_cv']:.2f}, RMSE CV: {res_a['rmse_cv']:.2f})")

    print("Prediciendo equipo B ...")
    res_b = predict_next_shots_for_df(df_b, window=window, min_rows=min_rows)
    print(f" -> Predicción tiros next (B): {res_b['pred_next_shots']:.2f} (MAE CV: {res_b['mae_cv']:.2f}, RMSE CV: {res_b['rmse_cv']:.2f})")

    total = res_a["pred_next_shots"] + res_b["pred_next_shots"]
    print(f"\nPredicción de tiros totales en el enfrentamiento: {total:.2f}")

    return {"team_a": res_a, "team_b": res_b, "total_predicted_shots": total}


# ========================
# EJEMPLO DE USO
# ========================

if __name__ == "__main__":
    # Coloca aquí los nombres de tus archivos
    file_a = r"D:\Fut_Pred\Data_equipos\2024-2025\Inglaterra\Liverpool.xlsx"
    file_b = r"D:\Fut_Pred\Data_equipos\2024-2025\Inglaterra\Nothing.xlsx"  # reemplaza con tu otro archivo Excel

    result = predict_match_total_shots(file_a, file_b, window=5, min_rows=8)


Prediciendo equipo A ...


ValueError: Cannot set a DataFrame with multiple columns to the single column Sh_roll_mean_5