In [None]:
from google.colab import drive
drive.mount('/content/drive')

In [2]:
import pandas as pd

In [3]:
train = pd.read_csv('/content/TRAIN_FINAL_FINAL.csv')
test = pd.read_csv('/content/TEST_FINAL_FINAL.csv')

In [4]:
X_train_aug = train.drop(columns=[
    'TIENDA_ID',
    'PROMEDIO_VENTA_MENSUAL',
    'PORCENTAJE_ÉXITO',
    'DATASET',
    'TOTAL_MESES'
])

y_train = train[['PROMEDIO_VENTA_MENSUAL', 'PORCENTAJE_ÉXITO']]

In [5]:
X_test_aug = test.drop(columns=[
    'TIENDA_ID',
    'PROMEDIO_VENTA_MENSUAL',
    'PORCENTAJE_ÉXITO',
    'DATASET',
    'TOTAL_MESES'
])

y_test = test[['PROMEDIO_VENTA_MENSUAL', 'PORCENTAJE_ÉXITO']]

In [6]:
X_train_aug['conjunto'] = 'train'
X_test_aug['conjunto'] = 'test'

X_all = pd.concat([X_train_aug, X_test_aug])
X_all_encoded = pd.get_dummies(X_all, drop_first=True)

X_train_encoded = X_all_encoded[X_all_encoded['conjunto_train'] == 1].drop(columns='conjunto_train')
X_test_encoded = X_all_encoded[X_all_encoded['conjunto_train'] == 0].drop(columns='conjunto_train')


In [7]:
from xgboost import XGBRegressor
from sklearn.model_selection import GridSearchCV
from sklearn.metrics import r2_score

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.7, 1.0]
}

xgb_model = XGBRegressor(objective='reg:squarederror', random_state=42)

grid_search = GridSearchCV(
    estimator=xgb_model,
    param_grid=param_grid,
    cv=3,
    scoring='r2',
    verbose=1,
    n_jobs=-1
)

grid_search.fit(X_train_encoded, y_train['PROMEDIO_VENTA_MENSUAL'])

best_model = grid_search.best_estimator_

pred_ventas_test = best_model.predict(X_test_encoded)

r2 = r2_score(y_test['PROMEDIO_VENTA_MENSUAL'], pred_ventas_test)
print("Mejores parámetros:", grid_search.best_params_)
print("R² en test:", r2)


Fitting 3 folds for each of 36 candidates, totalling 108 fits
Mejores parámetros: {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 1.0}
R² en test: 0.6028239343452453


In [8]:
X_test_encoded['PRED_VENTAS'] = pred_ventas_test

X_train_encoded['PRED_VENTAS'] = y_train['PROMEDIO_VENTA_MENSUAL']


In [9]:
from sklearn.model_selection import GridSearchCV
from xgboost import XGBRegressor
from sklearn.metrics import r2_score

param_grid = {
    'n_estimators': [50, 100],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.7, 1.0]
}

xgb_model_2 = XGBRegressor(objective='reg:squarederror', random_state=42)

grid_search_2 = GridSearchCV(
    estimator=xgb_model_2,
    param_grid=param_grid,
    cv=3,
    scoring='r2',
    verbose=1,
    n_jobs=-1
)

grid_search_2.fit(X_train_encoded, y_train['PORCENTAJE_ÉXITO'])

best_model_2 = grid_search_2.best_estimator_
pred_exito_test = best_model_2.predict(X_test_encoded)

r2_exito = r2_score(y_test['PORCENTAJE_ÉXITO'], pred_exito_test)
print("Mejores parámetros (éxito):", grid_search_2.best_params_)
print("R² en test (éxito):", r2_exito)


Fitting 3 folds for each of 36 candidates, totalling 108 fits
Mejores parámetros (éxito): {'learning_rate': 0.1, 'max_depth': 3, 'n_estimators': 50, 'subsample': 1.0}
R² en test (éxito): 0.9379956123482692
