In [140]:
import pandas as pd
import numpy as np
import warnings

from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import mean_absolute_error, make_scorer

import xgboost as xgb

In [99]:
train = pd.read_csv("./../data/train.csv")
test = pd.read_csv("./../data/test.csv")

# XGB

#### Train

In [114]:
Y_train = train['orders']
len(Y_train)

7340

In [115]:
X_train = train[['holiday','shops_closed','winter_school_holidays','school_holidays']]
#X_train = X_train.fillna(X_train.mean())

In [119]:
dtrain = xgb.DMatrix(X_train, label=Y_train)

#### Test

In [112]:
X_test = test[['holiday','shops_closed','winter_school_holidays','school_holidays']]

In [120]:
dtest = xgb.DMatrix(X_test)

#### model

In [156]:
param = {
    'max_depth': 3,  # the maximum depth of each tree
    'eta': 0.1,  # the training step for each iteration1
    'silent': 1,  # logging mode - quiet
    'objective': 'reg:squarederror',  # regression with squared loss
    'eval_metric': 'rmse'  # root mean square error for evaluation
}
#model = xgb.train(param, dtrain, 100)
best_model = xgb.XGBRegressor(**best_params)

In [158]:
best_model.fit(X_train,Y_train)

In [159]:
y_pred = best_model.predict(X_test)

In [160]:
result = pd.DataFrame({'id': test['id'], 'orders': y_pred})

In [161]:
result.to_csv('./submit/xgb_opti.csv', index=False)


# Opti

#### metrics

In [134]:
def rmae(y_true, y_pred):
    return np.sqrt(np.mean(np.abs(y_true - y_pred)))

rmae_scorer = make_scorer(rmae, greater_is_better=False)

#### param grid

In [152]:
param_grid = {
    'max_depth': [1, 3, 5],
    'eta': [0.05, 0.1, 0.5],
    'min_child_weight': [1, 5],
    'gamma': [0.5, 1],
    'lambda': [0.1, 1],
    'alpha': [0.1, 1]
}

In [153]:
xgb_reg = xgb.XGBRegressor(silent=1, objective='reg:squarederror')

grid_search = GridSearchCV(xgb_reg, param_grid, cv=3, scoring=rmae_scorer)


#### result

In [154]:
warnings.filterwarnings('ignore')
#warnings.filterwarnings('default')

In [155]:
grid_search.fit(X_train, Y_train)


# Get the best parameters
best_params = grid_search.best_params_
best_score = -grid_search.best_score_
print(f"Best parameters: {best_params}",f"Best RMAE: {best_score}")

Best parameters: {'alpha': 1, 'eta': 0.05, 'gamma': 0.5, 'lambda': 1, 'max_depth': 1, 'min_child_weight': 1} Best RMAE: 47.35026781453141
