# Modelo: Regresión Logística - HighImpactPlayer -- clasificación

In [None]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import StandardScaler
from sklearn.linear_model import LogisticRegression
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score, confusion_matrix

# Cargar datos
url = "https://raw.githubusercontent.com/Seba-RiveraC/Crisp_DM_CSGO/master/Anexo%20ET_demo_round_traces_2022%20(1).csv"
df = pd.read_csv(url, sep=';')

# Crear variables
df['ImpactPlayerScore'] = df['RoundKills'] + 0.5 * df['RoundAssists'] + df['RoundHeadshots']
df['KAST'] = ((df['RoundKills'] > 0) | (df['RoundAssists'] > 0)).astype(int)
df = df.sample(frac=0.45, random_state=42)

def convertir_a_float(valor):
    if isinstance(valor, str):
        valor = valor.replace('.', '').replace(',', '.')
        try:
            return float(valor)
        except:
            return np.nan
    return valor

for col in ['TimeAlive', 'TravelledDistance', 'KAST', 'ImpactPlayerScore']:
    df[col] = df[col].apply(convertir_a_float)

df.dropna(subset=['TimeAlive', 'TravelledDistance', 'KAST', 'ImpactPlayerScore'], inplace=True)

# Variable objetivo binaria
umbral = df['ImpactPlayerScore'].quantile(0.75)
df['HighImpactPlayer'] = (df['ImpactPlayerScore'] >= umbral).astype(int)

# Variables predictoras
X = df[['KAST', 'TimeAlive', 'TravelledDistance']]
y = df['HighImpactPlayer']

# División y escalado
X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=42)
scaler = StandardScaler()
X_train = scaler.fit_transform(X_train)
X_test = scaler.transform(X_test)

# Modelo: Regresión Logística
model = LogisticRegression(max_iter=1000)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

# Métricas
accuracy = accuracy_score(y_test, y_pred)
precision = precision_score(y_test, y_pred)
recall = recall_score(y_test, y_pred)
f1 = f1_score(y_test, y_pred)
cm = confusion_matrix(y_test, y_pred)

# Resultados
print(f" Modelo: Regresión Logística")
print(f"Accuracy  (exactitud global):     {accuracy:.3f}")
print(f"Precision (de los positivos predichos, cuántos son correctos): {precision:.3f}")
print(f"Recall    (de los positivos reales, cuántos fueron detectados): {recall:.3f}")
print(f" F1 Score  (balance entre precision y recall): {f1:.3f}")
print("\n Matriz de confusión:")
print(cm)