# TEST CATBOOST REGRESSOR

In [1]:
import numpy as np
from scipy.stats import uniform as sp_rand
from catboost import CatBoostRegressor
import pandas as pd
from datetime import datetime
from time import time
import gc
import os
import pickle

In [2]:
import warnings
warnings.filterwarnings('ignore')

### GLOBAL VARIABLES

In [3]:
INPUT_PATH = '../../../data'
OUTPUT_PATH = '../../../data/submission'
MODEL_PATH = '../../../models/catboost'
FEATURES_PATH = '../../../data/features'
FEATURES_NAME = 'features_v001_info'
OUTPUT_NAME = 'submission_003'
MODEL_NAME = 'catboost_reg_001'
SEED = 47
CV = 3
SCORE = 'RMSE'
DAYS_PRED = 28
handlingnull = False
NJOBS = -1
USEGPU = True

### FUNCTIONS

In [4]:
def on_kaggle():
    return "KAGGLE_KERNEL_RUN_TYPE" in os.environ

In [5]:
def make_submission(test, submission):
    preds = test[["id", "date", "demand"]]
    preds = preds.pivot(index="id", columns="date", values="demand").reset_index()
    preds.columns = ["id"] + ["F" + str(d + 1) for d in range(DAYS_PRED)]

    vals = submission[["id"]].merge(preds, how="inner", on="id")
    evals = submission[submission["id"].str.endswith("evaluation")]
    final = pd.concat([vals, evals])

    assert final.drop("id", axis=1).isnull().sum().sum() == 0
    assert final["id"].equals(submission["id"])

    if on_kaggle():
        final.to_csv("submission.csv", index=False)
    else:
        final.to_csv(f"{OUTPUT_PATH}/{OUTPUT_NAME}.csv", index=False)

### LOAD DATA

In [6]:
X_test =  pickle.load(open( f'{INPUT_PATH}/train_test/X_test.pkl', "rb" )) 

In [7]:
id_date = pd.read_csv(f'{INPUT_PATH}/train_test/id_date.csv')

In [21]:
submission = pd.read_csv(f'{INPUT_PATH}/raw/sample_submission.csv')

In [9]:
features = pd.read_csv(f'{INPUT_PATH}/train_test/{FEATURES_NAME}.csv', index_col=False)['feature'].tolist()

### LOAD MODEL

In [11]:
models = pickle.load(open(f'{MODEL_PATH}/{MODEL_NAME}.model', 'rb'))

### SCORING

In [12]:
nmodels = len(models)

In [13]:
imp_type = "gain"
importances = np.zeros(X_test.shape[1])
y_pred = np.zeros(X_test.shape[0])

for model in models:
    y_pred += model.predict(X_test)
    importances += model.feature_importances_

y_pred = y_pred / nmodels
importances = importances / nmodels

### SAVE RESULTS

In [22]:
make_submission(id_date.assign(demand=y_pred), submission)    