<a href="https://colab.research.google.com/github/DYNAFEM/REGRESION-PESO-BOVINOS/blob/MODELOS_SVR_RF_XGBOOST-REGRESSOR/V0_XGBOOST_REGRESSION.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import pandas as pd
import numpy as np
from xgboost import XGBRegressor
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score

In [None]:
!pip install xgboost




In [None]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [None]:
# Cargar datos procesados
ruta_train = '/content/drive/My Drive/REGRESION_PESO/DATOS_TRAIN_TEST_STANDARD/train_dataset.csv'
ruta_test = '/content/drive/My Drive/REGRESION_PESO/DATOS_TRAIN_TEST_STANDARD/test_dataset.csv'

train_df = pd.read_csv(ruta_train)
test_df = pd.read_csv(ruta_test)

# Separar datos
X_train = train_df.drop(columns=['weight_in_kg'])
y_train = train_df['weight_in_kg']

X_test = test_df.drop(columns=['weight_in_kg'])
y_test = test_df['weight_in_kg']

In [None]:
# Definir y entrenar el modelo XGBoost
xgb_model = XGBRegressor(n_estimators=100, learning_rate=0.1, max_depth=5, random_state=42)
xgb_model.fit(X_train, y_train)

In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np

# Predecir
y_pred = xgb_model.predict(X_test)

# Evaluar
mae = mean_absolute_error(y_test, y_pred)
mse = mean_squared_error(y_test, y_pred)
rmse = np.sqrt(mse)
r2 = r2_score(y_test, y_pred)

# Imprimir resultados
print(f"MAE: {mae:.2f} kg")
print(f"MSE: {mse:.2f} kg²")
print(f"RMSE: {rmse:.2f} kg")
print(f"R² Score: {r2:.3f}")


MAE: 4.08 kg
MSE: 84.14 kg²
RMSE: 9.17 kg
R² Score: 0.976


# 2. BUSQUEDA DE HIPERPARAMETROS XGBOOST REGRESSOR

In [None]:
from xgboost import XGBRegressor
from sklearn.model_selection import RandomizedSearchCV
from scipy.stats import randint, uniform

# Espacio de búsqueda
param_dist = {
    'n_estimators': randint(200, 800),
    'learning_rate': uniform(0.01, 0.3),
    'max_depth': randint(3, 15),
    'subsample': uniform(0.5, 0.5),       # 0.5 – 1.0
    'colsample_bytree': uniform(0.5, 0.5),# 0.5 – 1.0
    'gamma': uniform(0, 5),
    'min_child_weight': randint(1, 10),
    'reg_alpha': uniform(0, 1)
}

# Búsqueda aleatoria
xgb_search = RandomizedSearchCV(
    estimator=xgb_model,
    param_distributions=param_dist,
    n_iter=40,
    scoring='r2',
    cv=5,
    random_state=42,
    n_jobs=-1,
    verbose=2
)

# Ajustar
xgb_search.fit(X_train, y_train)

Fitting 5 folds for each of 40 candidates, totalling 200 fits


In [None]:
import pandas as pd

# Obtener diccionarios de hiperparámetros
params_base = xgb_model.get_params()
params_best = xgb_search.best_estimator_.get_params()

# Encontrar parámetros diferentes
param_diff = {
    k: (params_base[k], params_best[k])
    for k in params_base.keys()
    if params_base[k] != params_best[k]
}

# Convertir a tabla
df_param_diff = pd.DataFrame([
    {"Hiperparámetro": k, "Base": v[0], "Optimizado": v[1]}
    for k, v in param_diff.items()
])

df_param_diff


Unnamed: 0,Hiperparámetro,Base,Optimizado
0,colsample_bytree,,0.679746
1,gamma,,1.467959
2,learning_rate,0.1,0.252808
3,min_child_weight,,2.0
4,missing,,
5,n_estimators,100.0,607.0
6,reg_alpha,,0.913241
7,subsample,,0.755671


In [None]:
from sklearn.metrics import mean_absolute_error, mean_squared_error, r2_score
import numpy as np
import pandas as pd

# Modelos
xgb_best = xgb_search.best_estimator_

# ---- Predicciones ----
y_pred_base = xgb_model.predict(X_test)
y_pred_best = xgb_best.predict(X_test)

# ---- Métricas modelo base ----
base_mae  = mean_absolute_error(y_test, y_pred_base)
base_mse  = mean_squared_error(y_test, y_pred_base)
base_rmse = np.sqrt(base_mse)
base_r2   = r2_score(y_test, y_pred_base)

# ---- Métricas modelo optimizado ----
best_mae  = mean_absolute_error(y_test, y_pred_best)
best_mse  = mean_squared_error(y_test, y_pred_best)
best_rmse = np.sqrt(best_mse)
best_r2   = r2_score(y_test, y_pred_best)

# ---- Tabla comparativa métricas ----
df_metrics = pd.DataFrame({
    "Métrica": ["MAE", "MSE", "RMSE", "R²"],
    "Modelo Base": [base_mae, base_mse, base_rmse, base_r2],
    "Modelo Optimizado": [best_mae, best_mse, best_rmse, best_r2]
})

df_metrics


Unnamed: 0,Métrica,Modelo Base,Modelo Optimizado
0,MAE,4.078301,3.275431
1,MSE,84.142647,33.719318
2,RMSE,9.17293,5.806834
3,R²,0.976243,0.990479
