# TEST XGBOOST REGRESSOR

In [None]:
import numpy as np
from scipy.stats import uniform as sp_rand
from sklearn import datasets
import xgboost as xgb
from xgboost.sklearn import XGBRegressor
import pandas as pd
from datetime import datetime
from time import time
from sklearn.metrics import mean_squared_error
import gc
import os
import pickle

In [None]:
import warnings
warnings.filterwarnings('ignore')

### GLOBAL VARIABLES

In [None]:
INPUT_PATH = '../../../data'
OUTPUT_PATH = '../../../data/submission'
MODEL_PATH = '../../../models/xgboost'
FEATURES_PATH = '../../../data/features'
OUTPUT_NAME = 'submission_XXX'
MODEL_NAME = 'xgb_reg_XXX'
SEED = 47
CV = 3
SCORE = 'rmse'
DAYS_PRED = 28
handlingnull = False
NJOBS = -1
USEGPU = False

### FUNCTIONS

In [None]:
def on_kaggle():
    return "KAGGLE_KERNEL_RUN_TYPE" in os.environ

In [None]:
def make_submission(test, submission):
    preds = test[["id", "date", "demand"]]
    preds = preds.pivot(index="id", columns="date", values="demand").reset_index()
    preds.columns = ["id"] + ["F" + str(d + 1) for d in range(DAYS_PRED)]

    vals = submission[["id"]].merge(preds, how="inner", on="id")
    evals = submission[submission["id"].str.endswith("evaluation")]
    final = pd.concat([vals, evals])

    assert final.drop("id", axis=1).isnull().sum().sum() == 0
    assert final["id"].equals(submission["id"])

    if on_kaggle():
        final.to_csv("submission.csv", index=False)
    else:
        final.to_csv(f"{OUTPUT_PATH}/{OUTPUT_NAME}.csv", index=False)

In [None]:
def rmse(y_true, y_pred):
    return np.sqrt(mean_squared_error(y_true, y_pred))

### LOAD DATA

In [None]:
X_test = pickle.load(open( f'{INPUT_PATH}/train_test/X_test.pkl', "rb" )) 

In [None]:
id_date = pd.read_csv(f'{INPUT_PATH}/train_test/id_date.csv')

In [None]:
submission = pd.read_csv(f'{INPUT_PATH}/raw/sample_submission.csv')

### LOAD MODEL

In [None]:
models = pickle.load(open(f'{MODEL_PATH}/{MODEL_NAME}.model', 'rb'))

### SCORING

In [None]:
nmodels = len(models)

In [None]:
imp_type = "gain"
importances = np.zeros(X_test.shape[1])
y_pred = np.zeros(X_test.shape[0])

for model in models:
    y_pred += model.predict(X_test)
    importances += model.feature_importance(imp_type)

y_pred = y_pred / nmodels
importances = importances / nmodels

### SAVE RESULTS

In [None]:
make_submission(id_date.assign(demand=y_pred), submission)    