In [2]:
import pandas as pd
from sklearn.model_selection import train_test_split
from sklearn.linear_model import LinearRegression
from sklearn.metrics import r2_score, mean_squared_error, mean_absolute_error

In [1]:
%load_ext kedro.ipython

In [5]:
data = catalog.load("data_final")

In [6]:
# Check the columns and shape of the data
print("Dataset shape:", data.shape)
print("\nAvailable columns:")
print(data.columns.tolist())
print("\nFirst few rows:")
data.head()

Dataset shape: (200873, 33)

Available columns:
['dateid', 'platform', 'gamemode', 'mapname', 'matchid', 'roundnumber', 'objectivelocation', 'winrole', 'endroundreason', 'roundduration', 'clearancelevel', 'skillrank', 'role', 'team', 'haswon', 'operator', 'nbkills', 'isdead', 'primaryweapon', 'primaryweapontype', 'primarysight', 'primarygrip', 'primaryunderbarrel', 'primarybarrel', 'secondaryweapon', 'secondaryweapontype', 'secondarysight', 'secondarygrip', 'secondaryunderbarrel', 'secondarybarrel', 'secondarygadget', 'kill_death_ratio', 'impact_score']

First few rows:


Unnamed: 0,dateid,platform,gamemode,mapname,matchid,roundnumber,objectivelocation,winrole,endroundreason,roundduration,...,primarybarrel,secondaryweapon,secondaryweapontype,secondarysight,secondarygrip,secondaryunderbarrel,secondarybarrel,secondarygadget,kill_death_ratio,impact_score
0,20170212,1,0,13,1522488121,4,48,0,6,120,...,3,3,0,3,0,1,3,3,0.0,0
1,20170212,1,2,8,1522741281,1,33,0,6,128,...,3,4,0,3,0,1,3,1,2.0,2
2,20170212,1,2,8,1522741281,2,33,0,6,169,...,3,4,0,3,0,1,3,2,0.0,0
3,20170212,1,0,12,1523152401,1,82,1,1,153,...,3,3,0,3,0,1,3,1,1.0,0
4,20170212,2,2,13,1523847781,7,107,0,6,186,...,3,4,0,3,0,1,3,0,0.0,0


In [None]:
# Preprocess data
X = data[[    
    'gamemode',        # tipo de partida
    'winrole',         # atacante o defensor
    'endroundreason',  # c√≥mo termin√≥ la ronda
    'roundduration',   # cu√°nto dur√≥
    'isdead',          # si el jugador muri√≥
    'nbkills',         # n√∫mero de bajas
]]
y = data['impact_score']
# Split data into training and testing sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Crear y entrenar el modelo
model = LinearRegression()
model.fit(X_train, y_train)

# üîπ Evaluaci√≥n del modelo
y_pred = model.predict(X_test)

r2 = r2_score(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = mse ** 0.5  # Calcular RMSE manualmente

mae = mean_absolute_error(y_test, y_pred)

print(f"üìä Regresi√≥n M√∫ltiple: impact_score ~ multiple features")
print(f"=" * 50)
print(f"Mean Squared Error (MSE): {mse:.4f}")
print(f"Mean Absolute Error (MAE): {mae:.4f}")
print(f"Root Mean Squared Error (RMSE): {rmse:.4f}")
print(f"R¬≤ Score: {r2:.4f}")
print(f"=" * 50)

# Mostrar los coeficientes de la regresi√≥n m√∫ltiple
print("\nCoeficientes del modelo:")
feature_names = ['gamemode', 'winrole', 'endroundreason', 'roundduration', 'isdead', 'nbkills']
for i, coef in enumerate(model.coef_):
    print(f"  {feature_names[i]}: {coef:.4f}")
print(f"  Intercepto: {model.intercept_:.4f}")

# Interpretaci√≥n del R¬≤
if r2 > 0.7:
    print("\n‚úÖ Modelo con buen ajuste (R¬≤ > 0.7)")
elif r2 > 0.5:
    print("\n‚ö†Ô∏è Modelo con ajuste moderado (R¬≤ > 0.5)")
else:
    print("\n‚ùå Modelo con ajuste pobre (R¬≤ < 0.5)")

üìä Regresi√≥n M√∫ltiple: impact_score ~ multiple features
Mean Squared Error (MSE): 0.1896
Mean Absolute Error (MAE): 0.2969
Root Mean Squared Error (RMSE): 0.4354
R¬≤ Score: 0.6895

Coeficientes del modelo:
  gamemode: 0.0050
  winrole: -0.0571
  endroundreason: -0.0085
  roundduration: 0.0003
  isdead: -0.4478
  nbkills: 0.6549
  Intercepto: 0.3420

‚ö†Ô∏è Modelo con ajuste moderado (R¬≤ > 0.5)


In [None]:
from sklearn.model_selection import GridSearchCV

# Definir el modelo base
reg = LinearRegression()

# Definir el grid de hiperpar√°metros (para LinearRegression hay pocos, pero se puede ajustar 'fit_intercept' y 'normalize')
param_grid = {
    'fit_intercept': [True, False],
    'positive': [True, False]
}

# Configurar GridSearchCV
grid_search = GridSearchCV(
    estimator=reg,
    param_grid=param_grid,
    scoring='neg_mean_squared_error',
    cv=5,
    n_jobs=-1
)

# Ajustar el grid search a los datos de entrenamiento
grid_search.fit(X_train, y_train)

# resultados del mejor modelo con mae mse rmse r2
best_model = grid_search.best_estimator_
y_pred_best = best_model.predict(X_test)
mse_best = mean_squared_error(y_test, y_pred_best)
rmse_best = mse_best ** 0.5
mae_best = mean_absolute_error(y_test, y_pred_best)
r2_best = r2_score(y_test, y_pred_best)



In [None]:
# Imprimir resultados
print("Resultados del mejor modelo:")
print("MAE:", mae_best)
print("MSE:", mse_best)
print("RMSE:", rmse_best)
print("R2:", r2_best)