## **XGBoost Regression**
https://rdrr.io/cran/xgboost/man/xgb.cv.html

In [2]:
%load_ext autoreload
%autoreload 2

import pandas as pd
import numpy as np
import os
import glob
import pickle
import matplotlib.pyplot as plt
import matplotlib as mpl
mpl.rcParams['agg.path.chunksize'] = 100000
import gc
from myUtils import *
from feature_generator import feature_v1, feature_v2, feature_v3
import xgboost as xgb

In [4]:
###################################################################
# make features
##################################################################
#feature_maker = feature_v1.FeaturesMaker_v1(target_col="item_cnt")
feature_maker = feature_v3.FeaturesMaker_v3(target_col="item_cnt")

base_data = "sales_train_eval_365"
data_path = os.path.join("mydata",base_data+"_"+feature_maker.name+".pickle")

if os.path.exists(data_path):
    with open(data_path,"rb") as f:
        data = pickle.load(f)
    print("data loaded")
else:
    data = pd.read_pickle(os.path.join("mydata",base_data+".pickle"))
    data = feature_maker.make_feature(data)
    
    with open(data_path,"wb") as f:
        pickle.dump(data,f)

data loaded


In [7]:
dir = os.path.join("models","RightGBM_quantileReg")
if not(os.path.exists(dir)):
    os.makedirs(dir)

qlist = np.array([0.005,0.025,0.165,0.25, 0.5, 0.75, 0.835, 0.975, 0.995])


import lightgbm as lgb

for q in qlist:
    model_path = os.path.join(dir,"RightGBM_"+f"{q:.3f}"+"_"+feature_maker.name+".mdl")

    if os.path.exists(model_path):
        print("loading trained model...")
        model = lgb.Booster(model_file=model_path)

    else:
        print("start training RightGBM")
        model = lgb.LGBMRegressor(objective='quantile', 
                                  alpha=q,
                                  n_estimators=1000,
                                  learning_rate=.1,
                                  min_samples_leaf=9,
                                  min_samples_split=9)
        

        model.fit(X=data["train"][0], y=data["train"][1], 
                  #sample_weight=None, 
                  #base_margin=None, 
                  eval_set=[data["train"],data["validation"]], 
                  #eval_metric=None, 
                  early_stopping_rounds=100, 
                  verbose=True, 
                  #xgb_model=None, 
                  #sample_weight_eval_set=None
                  )
        model.booster_.save_model(model_path)
        
print("  -- completed\n")

start training XGBoost


KeyboardInterrupt: 

#### [prediction]

In [70]:
# prediction
print("start prediction")
pred_mask = data["evaluation"][1].isna()
data["evaluation"][1].loc[pred_mask] = model.predict(data["evaluation"][0])
print("  -- completed\n")

start prediction
  -- completed



#### [submission]

In [71]:
# submission 
print("start submission")
sub_path = os.path.join("submission_point","XGBoost_"+feature_maker.name+"_submission.csv")


sub_cols = ["id"] + [f"F{i}" for i in range(1, 29)]

valid = data["validation"][1]
evalu = data["evaluation"][1]

del data
gc.collect()

valid = pd.DataFrame(valid.values,
                     index=valid.index,
                     columns=[feature_maker.target_col])
evalu = pd.DataFrame(evalu.values,
                     index=evalu.index,
                     columns=[feature_maker.target_col])

valid = valid.reset_index()
evalu = evalu.reset_index()

valid = pd.pivot(valid,
                 index="id", 
                 columns="d",
                 values=feature_maker.target_col)
evalu = pd.pivot(evalu,
                 index="id", 
                 columns="d", 
                 values=feature_maker.target_col)

valid = valid.reset_index()
evalu = evalu.reset_index()

valid.columns = sub_cols
evalu.columns = sub_cols

valid["id"] = valid["id"].str.replace("_evaluation","_validation")

pd.concat([valid,evalu]).to_csv(sub_path,index=False)
print("  -- completed")

start submission
  -- completed
