# Imports and config

In [1]:
import os
import pickle
import random
import time
import itertools

import numpy as np
import pandas as pd

import xgboost as xgb

import config as cfg

# Model Selection

https://www.analyticsvidhya.com/blog/2016/03/complete-guide-parameter-tuning-xgboost-with-codes-python/  

We will use an approach similar to that of GBM here. The various steps to be performed are:

- Choose a relatively high learning rate. Generally a learning rate of 0.1 works but somewhere between 0.05 to 0.3 - should work for different problems. Determine the optimum number of trees for this learning rate. XGBoost has a very useful function called as “cv” which performs cross-validation at each boosting iteration and thus returns the optimum number of trees required.  
- Tune tree-specific parameters ( max_depth, min_child_weight, gamma, subsample, colsample_bytree) for decided learning rate and number of trees. Note that we can choose different parameters to define a tree and I’ll take up an example here.  
- Tune regularization parameters (lambda, alpha) for xgboost which can help reduce model complexity and enhance performance.  
- Lower the learning rate and decide the optimal parameters .

In [2]:
df= pd.read_pickle("cleaned_sets/df_fs_done.pkl")

In [3]:
train = df.loc[(df["date_block_num"]>=20) & (df["date_block_num"]<=32)]
test = df.loc[(df["date_block_num"]>=33)]

identificators = ["shop_id","item_id","date_block_num"]
label = "item_cnt_next_month"
predictors = [x for x in train.columns if x not in identificators and x!=label]

X_train = train[predictors]
y_train = train[label]

X_test = test[predictors+identificators]
del df

In [4]:
dtrain = xgb.DMatrix(X_train, label=y_train)

### Function for fine tuning

In [5]:
def dict_to_iterlist(d):
    keys=d.keys()
    lists=d.values()
    return list(keys), list(itertools.product(*lists))

In [6]:
def fine_tune_xgb(initial_params, gridsearch_params, dtrain, early_stopping_rounds=10, cv_fold=5):
    min_rmse = float("Inf")
    best_params = None
    
    params=initial_params
    gs_param_names, combinations = dict_to_iterlist(gridsearch_params)
    nb_gs_params = len(gs_param_names)
    
    boosting_rounds=100
    
    if "num_boost_round" in params:
        boosting_rounds = params["num_boost_round"]
    
    for combi in combinations:
        start_time=time.time()
        print(", ".join([f"{gs_param_names[i]}={combi[i]}" for i in range(nb_gs_params)]))

        # Update our parameters
        for i in range(nb_gs_params):
            params[gs_param_names[i]] = combi[i]
            
        if "num_boost_round" in gs_param_names:
            boosting_rounds=combi[gs_param_names.index("num_boost_round")]
        
        # Run CV
        cv_results = xgb.cv(
            params,
            dtrain,
            num_boost_round=boosting_rounds,
            seed=42,
            nfold=cv_fold,
            metrics={'rmse'},
            early_stopping_rounds=early_stopping_rounds
        )
        # Update best RMSE
        
        mean_rmse = cv_results['test-rmse-mean'].min()
        boost_rounds = np.argmin(np.array(cv_results['test-rmse-mean']))
        
        print("\tRMSE {} for {} rounds".format(mean_rmse, boost_rounds))
        print("Time taken for this round {}".format(time.time()-start_time))
        if mean_rmse < min_rmse:
            min_rmse = mean_rmse
            best_params = combi
    
    print(best_params)
    print("".join(["Best params:", 
                   ", ".join([f"{gs_param_names[i]}={best_params[i]}" for i in range(nb_gs_params)]),
                  f", RMSE: {min_rmse}",]))

#### Number of estimators

In [49]:
params = {
    # Parameters that we are going to tune.
    'max_depth':5,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    "num_boost_round":[50,100,200,300,400,500]
        }

fine_tune_xgb(params, gridsearch_params, dtrain, early_stopping_rounds=20)

num_boost_round=50
	RMSE 1.2196630000000002 for 49 rounds
Time taken for this round 103.70703482627869
num_boost_round=100
	RMSE 1.1969746 for 99 rounds
Time taken for this round 172.74095916748047
num_boost_round=200
	RMSE 1.1890098 for 167 rounds
Time taken for this round 291.32142782211304
num_boost_round=300
	RMSE 1.1899692 for 185 rounds
Time taken for this round 315.2127494812012
num_boost_round=400
	RMSE 1.1891631999999999 for 167 rounds
Time taken for this round 291.71624302864075
num_boost_round=500
	RMSE 1.1880558 for 167 rounds
Time taken for this round 291.6056160926819
(500,)
Best params:num_boost_round=500, RMSE: 1.1880558


num_boost_round=170 seems fine: more trees lead to the same results as the training is stopped at 167 rounds.

##### max_depth, min_child_weight

In [50]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round':170,
    'max_depth':5,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    "max_depth": [4,5,6,7,8,10,12],
    "min_child_weight": [1,2,3,4]
}

fine_tune_xgb(params, gridsearch_params, dtrain)

max_depth=4, min_child_weight=1
	RMSE 1.2274471999999998 for 167 rounds
Time taken for this round 258.2041437625885
max_depth=4, min_child_weight=2
	RMSE 1.2402436 for 121 rounds
Time taken for this round 208.62208604812622
max_depth=4, min_child_weight=3
	RMSE 1.238587 for 120 rounds
Time taken for this round 205.17154598236084
max_depth=4, min_child_weight=4
	RMSE 1.2321638 for 168 rounds
Time taken for this round 257.22781586647034
max_depth=5, min_child_weight=1
	RMSE 1.2001386 for 142 rounds
Time taken for this round 245.27119278907776
max_depth=5, min_child_weight=2
	RMSE 1.1967138 for 120 rounds
Time taken for this round 215.67621684074402
max_depth=5, min_child_weight=3
	RMSE 1.2136694000000001 for 100 rounds
Time taken for this round 187.8080506324768
max_depth=5, min_child_weight=4
	RMSE 1.2024322 for 120 rounds
Time taken for this round 214.21903562545776
max_depth=6, min_child_weight=1
	RMSE 1.2001954 for 126 rounds
Time taken for this round 235.06127548217773
max_depth=6, 

	RMSE 1.1802432 for 167 rounds
Time taken for this round 298.44205713272095
max_depth=7, min_child_weight=2
	RMSE 1.1818848000000002 for 126 rounds
Time taken for this round 245.60728693008423
max_depth=7, min_child_weight=3
	RMSE 1.1803046000000001 for 85 rounds
Time taken for this round 183.79954957962036
max_depth=7, min_child_weight=4
	RMSE 1.1927474 for 120 rounds
Time taken for this round 234.44723391532898
max_depth=8, min_child_weight=1
	RMSE 1.1879422000000002 for 142 rounds
Time taken for this round 286.6756217479706
max_depth=8, min_child_weight=2
	RMSE 1.1722228 for 167 rounds
Time taken for this round 311.0571653842926
max_depth=8, min_child_weight=3
	RMSE 1.1794086 for 120 rounds
Time taken for this round 247.87071657180786
max_depth=8, min_child_weight=4
	RMSE 1.1948006 for 57 rounds
Time taken for this round 148.36074423789978
max_depth=10, min_child_weight=1
	RMSE 1.1827400000000001 for 80 rounds
Time taken for this round 222.37631058692932
max_depth=10, min_child_weig

	RMSE 1.1930223999999998 for 62 rounds
Time taken for this round 237.44577026367188
max_depth=12, min_child_weight=2
	RMSE 1.1837238 for 62 rounds
Time taken for this round 228.58414387702942
max_depth=12, min_child_weight=3
	RMSE 1.1743868 for 62 rounds
Time taken for this round 221.41219925880432
max_depth=12, min_child_weight=4
	RMSE 1.1729502000000003 for 79 rounds
Time taken for this round 246.31732845306396
(8, 2)
Best params:max_depth=8, min_child_weight=2, RMSE: 1.1722228


In [52]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round':170,
    'max_depth':5,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    "max_depth": [7,8,9],
    "min_child_weight": [2,3,4]
}

fine_tune_xgb(params, gridsearch_params, dtrain)

max_depth=7, min_child_weight=2
	RMSE 1.1804876 for 121 rounds
Time taken for this round 238.43140172958374
max_depth=7, min_child_weight=3
	RMSE 1.1799836 for 86 rounds
Time taken for this round 183.94056367874146
max_depth=7, min_child_weight=4
	RMSE 1.2046284 for 85 rounds
Time taken for this round 183.2579152584076
max_depth=8, min_child_weight=2
	RMSE 1.1726098 for 167 rounds
Time taken for this round 312.01624631881714
max_depth=8, min_child_weight=3
	RMSE 1.1880956 for 59 rounds
Time taken for this round 154.11806225776672
max_depth=8, min_child_weight=4
	RMSE 1.1923002 for 86 rounds
Time taken for this round 192.7191858291626
max_depth=9, min_child_weight=2
	RMSE 1.19608 for 62 rounds
Time taken for this round 172.04544067382812
max_depth=9, min_child_weight=3
	RMSE 1.1890304 for 56 rounds
Time taken for this round 158.5233793258667
max_depth=9, min_child_weight=4
	RMSE 1.1868642 for 78 rounds
Time taken for this round 193.0987958908081
(8, 2)
Best params:max_depth=8, min_child

max_depth=8, min_child_weight=2 are the best results

#### Recalibrate num_boost_round

In [51]:
params = {
    # Parameters that we are going to tune.
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'num_boost_round':[300]
        }

fine_tune_xgb(params, gridsearch_params, dtrain, early_stopping_rounds=20)

num_boost_round=300
	RMSE 1.1737327999999998 for 179 rounds
Time taken for this round 358.37774634361267
(300,)
Best params:num_boost_round=300, RMSE: 1.1737327999999998


We'll take 200

#### Gamma

In [53]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'gamma':[0,0.1,0.2, 0.5, 1, 3, 5, 10]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

gamma=0
	RMSE 1.1740466 for 142 rounds
Time taken for this round 283.63626527786255
gamma=0.1
	RMSE 1.1857046 for 84 rounds
Time taken for this round 194.42441487312317
gamma=0.2
	RMSE 1.1863296 for 83 rounds
Time taken for this round 193.43651843070984
gamma=0.5
	RMSE 1.1861798000000001 for 83 rounds
Time taken for this round 192.63363575935364
gamma=1
	RMSE 1.1851858 for 83 rounds
Time taken for this round 192.5298991203308
gamma=3
	RMSE 1.1852354000000003 for 83 rounds
Time taken for this round 192.6878845691681
gamma=5
	RMSE 1.1839124 for 83 rounds
Time taken for this round 192.25400686264038
gamma=10
	RMSE 1.189887 for 86 rounds
Time taken for this round 196.72802639007568
(0,)
Best params:gamma=0, RMSE: 1.1740466


gamma=0.5 is the best

#### Tune subsample and colsample_bytree

In [7]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'subsample':[i/10.0 for i in range(6,10)],
    'colsample_bytree':[i/10.0 for i in range(6,10)]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

subsample=0.6, colsample_bytree=0.6
	RMSE 1.1875862000000001 for 87 rounds
Time taken for this round 184.87776160240173
subsample=0.6, colsample_bytree=0.7
	RMSE 1.189267 for 70 rounds
Time taken for this round 160.229887008667
subsample=0.6, colsample_bytree=0.8
	RMSE 1.1915727999999999 for 70 rounds
Time taken for this round 153.00372862815857
subsample=0.6, colsample_bytree=0.9
	RMSE 1.1884956 for 142 rounds
Time taken for this round 246.17335152626038
subsample=0.7, colsample_bytree=0.6
	RMSE 1.1749862 for 90 rounds
Time taken for this round 196.45688319206238
subsample=0.7, colsample_bytree=0.7
	RMSE 1.1852384 for 83 rounds
Time taken for this round 186.45992374420166
subsample=0.7, colsample_bytree=0.8
	RMSE 1.17841 for 68 rounds
Time taken for this round 159.00785279273987
subsample=0.7, colsample_bytree=0.9
	RMSE 1.1778976 for 76 rounds
Time taken for this round 170.62844347953796
subsample=0.8, colsample_bytree=0.6
	RMSE 1.171667 for 70 rounds
Time taken for this round 176.872

	RMSE 1.1733042 for 86 rounds
Time taken for this round 214.87110257148743
subsample=0.9, colsample_bytree=0.7
	RMSE 1.1820495999999998 for 150 rounds
Time taken for this round 324.3407816886902
subsample=0.9, colsample_bytree=0.8
	RMSE 1.1882100000000002 for 85 rounds
Time taken for this round 209.80343341827393
subsample=0.9, colsample_bytree=0.9
	RMSE 1.1941674 for 167 rounds
Time taken for this round 349.7012231349945
(0.8, 0.6)
Best params:subsample=0.8, colsample_bytree=0.6, RMSE: 1.171667


In [9]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.8,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'subsample':[0.75, 0.8, 0.85],
    'colsample_bytree':[0.55, 0.6, 0.65]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

subsample=0.75, colsample_bytree=0.55
	RMSE 1.1852896000000002 for 88 rounds
Time taken for this round 208.34545588493347
subsample=0.75, colsample_bytree=0.6
	RMSE 1.1858366000000002 for 78 rounds
Time taken for this round 184.12703704833984
subsample=0.75, colsample_bytree=0.65
	RMSE 1.173198 for 70 rounds
Time taken for this round 176.0330832004547
subsample=0.8, colsample_bytree=0.55
	RMSE 1.1713628 for 86 rounds
Time taken for this round 211.1031301021576
subsample=0.8, colsample_bytree=0.6
	RMSE 1.1719052 for 75 rounds
Time taken for this round 185.3335473537445
subsample=0.8, colsample_bytree=0.65
	RMSE 1.1808406 for 86 rounds
Time taken for this round 206.0617229938507
subsample=0.85, colsample_bytree=0.55
	RMSE 1.1955188 for 88 rounds
Time taken for this round 220.09471559524536
subsample=0.85, colsample_bytree=0.6
	RMSE 1.1887257999999998 for 88 rounds
Time taken for this round 211.3262324333191
subsample=0.85, colsample_bytree=0.65
	RMSE 1.1769630000000002 for 76 rounds
Time

### Regularization parameters

In [7]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.55,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'reg_alpha': [1e-5, 1e-2, 0.1, 1, 100]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

reg_alpha=1e-05
	RMSE 1.1705998 for 86 rounds
Time taken for this round 217.98684453964233
reg_alpha=0.01
	RMSE 1.174505 for 82 rounds
Time taken for this round 203.41621708869934
reg_alpha=0.1
	RMSE 1.1702422000000001 for 86 rounds
Time taken for this round 214.10375595092773
reg_alpha=1
	RMSE 1.1696766 for 89 rounds
Time taken for this round 218.25359916687012
reg_alpha=100
	RMSE 1.1810122 for 91 rounds
Time taken for this round 221.46034741401672
(1,)
Best params:reg_alpha=1, RMSE: 1.1696766


In [10]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.55,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'reg_lambda': [1e-5, 1e-2, 0.1, 1, 100]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

reg_lambda=1e-05
	RMSE 1.1877206 for 79 rounds
Time taken for this round 200.7032380104065
reg_lambda=0.01
	RMSE 1.1853394 for 78 rounds
Time taken for this round 199.20953822135925
reg_lambda=0.1
	RMSE 1.1897996 for 77 rounds
Time taken for this round 197.6003177165985
reg_lambda=1
	RMSE 1.1709931999999998 for 85 rounds
Time taken for this round 209.83116602897644
reg_lambda=100
	RMSE 1.2446728 for 199 rounds
Time taken for this round 366.2140300273895
(1,)
Best params:reg_lambda=1, RMSE: 1.1709931999999998


In [8]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.55,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'reg_lambda': [0,0.1, 0.5, 1, 2],
    'reg_alpha' : [0,0.1, 0.5, 1, 2]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

reg_lambda=0, reg_alpha=0
	RMSE 1.1847988 for 84 rounds
Time taken for this round 209.03050017356873
reg_lambda=0, reg_alpha=0.1
	RMSE 1.1882952 for 73 rounds
Time taken for this round 190.46229887008667
reg_lambda=0, reg_alpha=0.5
	RMSE 1.1799668 for 73 rounds
Time taken for this round 189.87893152236938
reg_lambda=0, reg_alpha=1
	RMSE 1.1826328000000002 for 86 rounds
Time taken for this round 212.6131136417389
reg_lambda=0, reg_alpha=2
	RMSE 1.186167 for 79 rounds
Time taken for this round 198.54929423332214
reg_lambda=0.1, reg_alpha=0
	RMSE 1.1873338 for 86 rounds
Time taken for this round 210.93916702270508
reg_lambda=0.1, reg_alpha=0.1
	RMSE 1.1852866 for 90 rounds
Time taken for this round 218.29595375061035
reg_lambda=0.1, reg_alpha=0.5
	RMSE 1.1853378 for 86 rounds
Time taken for this round 211.8728928565979
reg_lambda=0.1, reg_alpha=1
	RMSE 1.1844253999999999 for 84 rounds
Time taken for this round 208.25660800933838
reg_lambda=0.1, reg_alpha=2
	RMSE 1.1825958 for 84 rounds
Ti

	RMSE 1.1789546000000002 for 77 rounds
Time taken for this round 193.2494192123413
reg_lambda=0.5, reg_alpha=1
	RMSE 1.1797578000000002 for 82 rounds
Time taken for this round 202.4807538986206
reg_lambda=0.5, reg_alpha=2
	RMSE 1.1790904000000002 for 82 rounds
Time taken for this round 202.1643249988556
reg_lambda=1, reg_alpha=0
	RMSE 1.1712396000000003 for 84 rounds
Time taken for this round 204.98287892341614
reg_lambda=1, reg_alpha=0.1
	RMSE 1.173066 for 91 rounds
Time taken for this round 217.6272270679474
reg_lambda=1, reg_alpha=0.5
	RMSE 1.170325 for 91 rounds
Time taken for this round 219.31133556365967
reg_lambda=1, reg_alpha=1
	RMSE 1.1696338000000002 for 88 rounds
Time taken for this round 212.8760380744934
reg_lambda=1, reg_alpha=2
	RMSE 1.1634102000000002 for 120 rounds
Time taken for this round 263.7967948913574
reg_lambda=2, reg_alpha=0
	RMSE 1.1796196 for 88 rounds
Time taken for this round 210.02613377571106
reg_lambda=2, reg_alpha=0.1
	RMSE 1.1851124 for 86 rounds
Time

	RMSE 1.1820598 for 88 rounds
Time taken for this round 210.4863612651825
(1, 2)
Best params:reg_lambda=1, reg_alpha=2, RMSE: 1.1634102000000002


In [None]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 200,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.55,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
    'reg_lambda': [1, 2, 3],
    'reg_alpha' : [2, 3, 4]
        }

fine_tune_xgb(params, gridsearch_params, dtrain)

reg_lambda=1, reg_alpha=2
	RMSE 1.1672677999999999 for 86 rounds
Time taken for this round 210.08526277542114
reg_lambda=1, reg_alpha=3


#### Reducing learning rate and increasing boosting rounds

In [7]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 2000,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.7,
    'reg_lambda':1,
    'reg_alpha':2,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

gridsearch_params = {
        }

fine_tune_xgb(params, gridsearch_params, dtrain, early_stopping_rounds=20)


	RMSE 1.1810918000000001 for 75 rounds
Time taken for this round 203.28630375862122
()
Best params:, RMSE: 1.1810918000000001


## Training

In [26]:
params = {
    # Parameters that we are going to tune.
    'num_boost_round': 150,
    'max_depth':8,
    'min_child_weight': 2,
    'gamma':0,
    'eta':.1,
    'subsample': 0.8,
    'colsample_bytree': 0.7,
    'reg_lambda':1,
    'reg_alpha':2,
    # Other parameters
    'objective':'reg:squarederror',
    'eval_metric':'rmse',
    "tree_method":'gpu_hist',
    "gpu_id":0
}

xgbr = xgb.XGBRegressor(max_depth = params["max_depth"], min_child_weight = params["min_child_weight"],
                       gamma = params["gamma"], eta=params["eta"], subsample=params["subsample"],
                       colsample_bytree= params["colsample_bytree"], n_estimators=params["num_boost_round"],
                       objective=params["objective"], tree_method= params["tree_method"], gpu_id = params["gpu_id"])

print(dtrain.feature_names)
xgbr.fit(X_train, y_train, eval_metric="rmse",early_stopping_rounds=20, eval_set=[(X_train, y_train)],verbose=True)

['days_with_sell', 'city_name_Moscow', 'city_name_Khimki', 'item_cnt_month', 'std_item_price_lag2', 'mean_item_price', 'mean_category_item_price', 'mean_city_cnt_month', 'std_category_item_price_lag1', 'nb_days', 'main_category_name_Others', 'std_item_price_lag1', 'city_name_SPb', 'city_name_Other', 'std_city_item_price', 'month', 'item_cnt_month_lag2', 'mean_city_item_price']
[0]	validation_0-rmse:2.25022
Will train until validation_0-rmse hasn't improved in 20 rounds.
[1]	validation_0-rmse:2.12050
[2]	validation_0-rmse:1.98549
[3]	validation_0-rmse:1.86912
[4]	validation_0-rmse:1.75886
[5]	validation_0-rmse:1.66426
[6]	validation_0-rmse:1.58306
[7]	validation_0-rmse:1.49978
[8]	validation_0-rmse:1.43010
[9]	validation_0-rmse:1.36563
[10]	validation_0-rmse:1.31378
[11]	validation_0-rmse:1.26338
[12]	validation_0-rmse:1.21995
[13]	validation_0-rmse:1.18687
[14]	validation_0-rmse:1.16195
[15]	validation_0-rmse:1.13583
[16]	validation_0-rmse:1.10490
[17]	validation_0-rmse:1.08058
[18]	va

[241]	validation_0-rmse:0.72810
[242]	validation_0-rmse:0.72775
[243]	validation_0-rmse:0.72764
[244]	validation_0-rmse:0.72755
[245]	validation_0-rmse:0.72723
[246]	validation_0-rmse:0.72686
[247]	validation_0-rmse:0.72662
[248]	validation_0-rmse:0.72628
[249]	validation_0-rmse:0.72613
[250]	validation_0-rmse:0.72598
[251]	validation_0-rmse:0.72575
[252]	validation_0-rmse:0.72564
[253]	validation_0-rmse:0.72540
[254]	validation_0-rmse:0.72535
[255]	validation_0-rmse:0.72524
[256]	validation_0-rmse:0.72515
[257]	validation_0-rmse:0.72489
[258]	validation_0-rmse:0.72464
[259]	validation_0-rmse:0.72420
[260]	validation_0-rmse:0.72396
[261]	validation_0-rmse:0.72391
[262]	validation_0-rmse:0.72369
[263]	validation_0-rmse:0.72356
[264]	validation_0-rmse:0.72341
[265]	validation_0-rmse:0.72319
[266]	validation_0-rmse:0.72285
[267]	validation_0-rmse:0.72253
[268]	validation_0-rmse:0.72190
[269]	validation_0-rmse:0.72182
[270]	validation_0-rmse:0.72177
[271]	validation_0-rmse:0.72160
[272]	va

XGBRegressor(base_score=0.5, booster='gbtree', colsample_bylevel=1,
             colsample_bynode=1, colsample_bytree=0.7, eta=0.1, gamma=0,
             gpu_id=0, importance_type='gain', learning_rate=0.1,
             max_delta_step=0, max_depth=8, min_child_weight=2, missing=None,
             n_estimators=300, n_jobs=1, num_parallel_tree=1,
             objective='reg:squarederror', random_state=0, reg_alpha=0,
             reg_lambda=1, scale_pos_weight=1, subsample=0.8,
             tree_method='gpu_hist', verbosity=1)

# Prediction

In [27]:
X_test = test[["shop_id","item_id","date_block_num"]+predictors]
print(X_test.isnull().sum())
X_test.head(2)

shop_id                         0
item_id                         0
date_block_num                  0
days_with_sell                  0
city_name_Moscow                0
city_name_Khimki                0
item_cnt_month                  0
std_item_price_lag2             0
mean_item_price                 0
mean_category_item_price        0
mean_city_cnt_month             0
std_category_item_price_lag1    0
nb_days                         0
main_category_name_Others       0
std_item_price_lag1             0
city_name_SPb                   0
city_name_Other                 0
std_city_item_price             0
month                           0
item_cnt_month_lag2             0
mean_city_item_price            0
dtype: int64


Unnamed: 0,shop_id,item_id,date_block_num,days_with_sell,city_name_Moscow,city_name_Khimki,item_cnt_month,std_item_price_lag2,mean_item_price,mean_category_item_price,...,std_category_item_price_lag1,nb_days,main_category_name_Others,std_item_price_lag1,city_name_SPb,city_name_Other,std_city_item_price,month,item_cnt_month_lag2,mean_city_item_price
33,0,16385,33,0.0,0,0,0.0,0.0,310.568627,1150.581824,...,12.951833,31,0,0.0,0,0,2.928609,10,0.0,668.887421
67,0,8195,33,0.0,0,0,0.0,0.0,141.206851,316.328258,...,0.777997,31,0,0.0,0,0,2.928609,10,0.0,668.887421


In [28]:
y_pred = xgbr.predict(X_test[predictors])
X_test["item_cnt_month_pred"] = y_pred

X_test.head(2)

Unnamed: 0,shop_id,item_id,date_block_num,days_with_sell,city_name_Moscow,city_name_Khimki,item_cnt_month,std_item_price_lag2,mean_item_price,mean_category_item_price,...,nb_days,main_category_name_Others,std_item_price_lag1,city_name_SPb,city_name_Other,std_city_item_price,month,item_cnt_month_lag2,mean_city_item_price,item_cnt_month_pred
33,0,16385,33,0.0,0,0,0.0,0.0,310.568627,1150.581824,...,31,0,0.0,0,0,2.928609,10,0.0,668.887421,0.041348
67,0,8195,33,0.0,0,0,0.0,0.0,141.206851,316.328258,...,31,0,0.0,0,0,2.928609,10,0.0,668.887421,0.016083


# Format to submission

In [29]:
to_pred = pd.read_csv(cfg.FILENAMES['TEST_SALES'])
to_pred.head(2)

Unnamed: 0,ID,shop_id,item_id
0,0,5,5037
1,1,5,5320


In [30]:
submission = pd.merge(to_pred, X_test[["item_id","shop_id","item_cnt_month_pred"]],
                      how="left", on=["item_id","shop_id"])
print(submission.isnull().sum())
submission = submission.fillna(0)
submission.head(2)

ID                          0
shop_id                     0
item_id                     0
item_cnt_month_pred    102796
dtype: int64


Unnamed: 0,ID,shop_id,item_id,item_cnt_month_pred
0,0,5,5037,0.988628
1,1,5,5320,0.0


In [31]:
submission.loc[submission["item_cnt_month_pred"]>20,"item_cnt_month_pred"]=20
submission.loc[submission["item_cnt_month_pred"]<0, "item_cnt_month_pred"]=0

In [32]:
sub_example = pd.read_csv(cfg.FILENAMES["SAMPLE_SUBM"])
sub_example.head(2)

Unnamed: 0,ID,item_cnt_month
0,0,0.5
1,1,0.5


In [33]:
submission_formated = (submission[["ID", "item_cnt_month_pred"]]
                       .rename({"item_cnt_month_pred":"item_cnt_month"}, axis=1))
submission_formated.head(2)

Unnamed: 0,ID,item_cnt_month
0,0,0.988628
1,1,0.0


In [34]:
submission_formated.to_csv(os.path.join("submissions", "sub_09.csv"), index=False)