# Tiros 

In [None]:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_squared_error, r2_score
from xgboost import XGBRegressor

# =========================
# Ejemplo de dataset (tú lo debes cargar desde tu fuente)
# =========================
# Supongamos que tienes columnas tanto ofensivas propias como defensivas rivales
data = {
    # ofensivas propias
    "Sh": [15, 8, 10, 12, 20],        # tiros totales
    "Poss": [55, 48, 62, 40, 70],     # posesión %
    "SCA": [20, 10, 15, 8, 25],       # acciones que llevan a tiro
    "Cmp%": [85, 76, 88, 70, 92],     # % de pases completados
    
    # defensivas rivales (en contra)
    "Sh_concedidos": [12, 14, 8, 18, 10],
    "SoT_concedidos": [6, 7, 4, 9, 5],
    "CK_concedidos": [5, 6, 3, 7, 2],
    "Poss_concedida": [45, 52, 38, 60, 30],
    
    # contexto
    "Localia": [1, 0, 1, 0, 1],  # 1 = local, 0 = visitante
    
    # variable objetivo
    "SoT": [8, 3, 5, 4, 12]  # tiros a puerta reales del equipo
}
df = pd.DataFrame(data)

# =========================
# Features y Target
# =========================
X = df[[
    "Sh", "Poss", "SCA", "Cmp%",
    "Sh_concedidos", "SoT_concedidos", "CK_concedidos", "Poss_concedida",
    "Localia"
]]
y = df["SoT"]

# =========================
# Train-Test Split
# =========================
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# =========================
# Modelo base con XGBoost
# =========================
model = XGBRegressor(
    n_estimators=200,
    learning_rate=0.1,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X_train, y_train)

y_pred = model.predict(X_test)

print("R2 Score (base):", r2_score(y_test, y_pred))
print("RMSE (base):", np.sqrt(mean_squared_error(y_test, y_pred)))

# =========================
# Bootstrapping para robustez
# =========================
n_iterations = 100
bootstrap_scores = []

for i in range(n_iterations):
    # resample bootstrap
    idx = np.random.choice(len(X_train), size=len(X_train), replace=True)
    X_boot, y_boot = X_train.iloc[idx], y_train.iloc[idx]
    
    model.fit(X_boot, y_boot)
    y_pred_boot = model.predict(X_test)
    score = r2_score(y_test, y_pred_boot)
    bootstrap_scores.append(score)

print(f"\nBootstrapped R2 promedio: {np.mean(bootstrap_scores):.3f}")
print(f"Intervalo de confianza (95%): [{np.percentile(bootstrap_scores, 2.5):.3f}, {np.percentile(bootstrap_scores, 97.5):.3f}]")

# =========================
# Importancia de variables
# =========================
import matplotlib.pyplot as plt

importances = model.feature_importances_
features = X.columns
sorted_idx = np.argsort(importances)

plt.barh(features[sorted_idx], importances[sorted_idx])
plt.title("Importancia de Variables (XGBoost)")
plt.show()


In [1]:
import numpy as np
import pandas as pd
from xgboost import XGBRegressor

# =========================
# Dataset histórico
# =========================
data = {
    "Sh": [12, 17, 6, 18, 15],
    "Poss": [59, 67, 68, 68, 65],
    "SCA": [3, 2, 3, 1, 3],
    "Cmp%": [85, 87, 86, 84, 88],

    "Sh_concedidos": [11, 8, 8, 6, 5],
    "SoT_concedidos": [0, 2, 4, 3, 2],
    "CK_concedidos": [12, 6, 3, 5, 2],
    "Poss_concedida": [45, 49, 38, 60, 30],
    
    "Localia": [1, 1, 0, 0, 0],
    
    "SoT": [5, 10, 5, 7, 6]  # tiros a puerta reales
}
df = pd.DataFrame(data)

X = df.drop(columns=["SoT"])
y = df["SoT"]

# =========================
# Entrenamos con TODOS los datos
# =========================
model = XGBRegressor(
    n_estimators=300,
    learning_rate=0.1,
    max_depth=4,
    subsample=0.8,
    colsample_bytree=0.8,
    random_state=42
)
model.fit(X, y)

# =========================
# Supongamos que quieres predecir el próximo partido
# =========================
# Datos simulados del próximo partido:
nuevo_partido = pd.DataFrame([{
    "Sh": 14,            # tiros esperados
    "Poss": 58,          # posesión esperada
    "SCA": 18,           # acciones creadas
    "Cmp%": 83,          # % de pases
    "Sh_concedidos": 11, # rival concede
    "SoT_concedidos": 5,
    "CK_concedidos": 4,
    "Poss_concedida": 42,
    "Localia": 1         # juega de local
}])

prediccion = model.predict(nuevo_partido)
print(f"Tiros a puerta esperados en el próximo partido: {prediccion[0]:.2f}")


Tiros a puerta esperados en el próximo partido: 4.89
