In [3]:
import numpy as np
import pandas as pd
# library for splitting training-testing
from sklearn.model_selection import train_test_split
# library for classification
from sklearn.metrics import accuracy_score

from sklearn.ensemble import RandomForestClassifier
from sklearn.svm import SVC
from sklearn.decomposition import PCA

from sklearn.model_selection import GridSearchCV, PredefinedSplit
from sklearn.preprocessing import StandardScaler
from sklearn import pipeline
import xgboost as XGB

from hyperopt import tpe,hp,Trials
from hyperopt.fmin import fmin
import warnings

  from pandas import MultiIndex, Int64Index


In [4]:
x_train = pd.read_feather("Data/x_train__IterativeImputer_genFeats.ftr")
x_test = pd.read_feather("Data/x_test__IterativeImputer_genFeats.ftr")

y_train = pd.read_csv("Data/train_y.csv")
y_test = pd.read_csv("Data/Sample_Output.csv", names = ['ID', 'Target'], header=None)

In [5]:
x_train_1, x_val, y_train_1, y_val = train_test_split(x_train, y_train, test_size=0.15, random_state=59, stratify=y_train)

In [4]:
def objective(space):
    warnings.filterwarnings(action='ignore', category=DeprecationWarning)
    model = XGB.XGBClassifier(n_estimators = space['n_estimators'],
                            max_depth = int(space['max_depth']),
                            learning_rate = space['learning_rate'],
                            gamma = space['gamma'],
                            min_child_weight = space['min_child_weight'],
                            subsample = space['subsample'],
                            colsample_bytree = space['colsample_bytree'],
                            verbosity=1,
#                             early_stopping_rounds=10,
                            eval_metric=['mlogloss', 'auc'],
                            n_jobs=8)
    
    model.fit(x_train_1, 
              y_train_1.values.reshape(-1), 
              eval_set=[(x_train_1, y_train_1.values.reshape(-1)), (x_val, y_val.values.reshape(-1))], 
              verbose=1,
              early_stopping_rounds=10,)
    
    preds_tr = model.predict(x_train_1,)
    preds_val = model.predict(x_val)
    val_score = accuracy_score(y_val, preds_val)
    tr_score = accuracy_score(y_train_1, preds_tr)
    print(tr_score, val_score, space)
    return -1.0*val_score

def optimize(trial):
    space = {
    'max_depth' : hp.choice('max_depth', range(5, 16, 1)),
    'learning_rate' : hp.quniform('learning_rate', 0.01, 0.5, 0.01),
    'n_estimators' : hp.choice('n_estimators', range(20, 100, 5)),
    'gamma' : hp.quniform('gamma', 0, 0.50, 0.01),
    'min_child_weight' : hp.quniform('min_child_weight', 1, 10, 1),
    'subsample' : hp.quniform('subsample', 0.1, 1, 0.01),
    'colsample_bytree' : hp.quniform('colsample_bytree', 0.1, 1.0, 0.01)}
    best=fmin(fn=objective,space=space,algo=tpe.suggest,trials=trial,max_evals=20,rstate=np.random.default_rng(59))
    return best

trial=Trials()
best=optimize(trial)

  0%|                                                                   | 0/20 [00:00<?, ?trial/s, best loss=?]


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.14156	validation_0-auc:0.92701	validation_1-mlogloss:1.14372	validation_1-auc:0.92225
[1]	validation_0-mlogloss:0.98147	validation_0-auc:0.93502	validation_1-mlogloss:0.98607	validation_1-auc:0.92927
[2]	validation_0-mlogloss:0.86491	validation_0-auc:0.93877	validation_1-mlogloss:0.87144	validation_1-auc:0.93282
[3]	validation_0-mlogloss:0.77690	validation_0-auc:0.94105	validation_1-mlogloss:0.78562	validation_1-auc:0.93461
[4]	validation_0-mlogloss:0.70906	validation_0-auc:0.94248	validation_1-mlogloss:0.71975	validation_1-auc:0.93563
[5]	validation_0-mlogloss:0.65556	validation_0-auc:0.94381	validation_1-mlogloss:0.66863	validation_1-auc:0.93654
[6]	validation_0-mlogloss:0.61347	validation_0-auc:0.94476	validation_1-mlogloss:0.62845	validation_1-auc:0.93729
[7]	validation_0-mlogloss:0.57933	validation_0-auc:0.94556	validation_1-mlogloss:0.59642	validation_1-auc:0.93771
[8]	validation_0-mlogloss:0.55199	validation_0-auc:0.94624	validation_1-mlogloss:0.57079


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.00827	validation_0-auc:0.92127	validation_1-mlogloss:1.00945	validation_1-auc:0.91929
[1]	validation_0-mlogloss:0.82091	validation_0-auc:0.93176	validation_1-mlogloss:0.82274	validation_1-auc:0.93011
[2]	validation_0-mlogloss:0.70557	validation_0-auc:0.93543	validation_1-mlogloss:0.70841	validation_1-auc:0.93362
[3]	validation_0-mlogloss:0.62870	validation_0-auc:0.93767	validation_1-mlogloss:0.63258	validation_1-auc:0.93555
[4]	validation_0-mlogloss:0.57803	validation_0-auc:0.93887	validation_1-mlogloss:0.58324	validation_1-auc:0.93636
[5]	validation_0-mlogloss:0.54307	validation_0-auc:0.93983	validation_1-mlogloss:0.54923	validation_1-auc:0.93712
[6]	validation_0-mlogloss:0.51869	validation_0-auc:0.94071	validation_1-mlogloss:0.52606	validation_1-auc:0.93767
[7]	validation_0-mlogloss:0.50071	validation_0-auc:0.94158	validation_1-mlogloss:0.50913	validation_1-auc:0.93829
[8]	validation_0-mlogloss:0.48793	validation_0-auc:0.94218	validation_1-mlogloss:0.49750


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.33460	validation_0-auc:0.93002	validation_1-mlogloss:1.33462	validation_1-auc:0.92751
[1]	validation_0-mlogloss:1.28759	validation_0-auc:0.93491	validation_1-mlogloss:1.28761	validation_1-auc:0.93231
[2]	validation_0-mlogloss:1.24348	validation_0-auc:0.93617	validation_1-mlogloss:1.24375	validation_1-auc:0.93348
[3]	validation_0-mlogloss:1.20252	validation_0-auc:0.93683	validation_1-mlogloss:1.20307	validation_1-auc:0.93415
[4]	validation_0-mlogloss:1.16445	validation_0-auc:0.93718	validation_1-mlogloss:1.16523	validation_1-auc:0.93447
[5]	validation_0-mlogloss:1.12914	validation_0-auc:0.93732	validation_1-mlogloss:1.12993	validation_1-auc:0.93464
[6]	validation_0-mlogloss:1.09595	validation_0-auc:0.93750	validation_1-mlogloss:1.09693	validation_1-auc:0.93486
[7]	validation_0-mlogloss:1.06502	validation_0-auc:0.93769	validation_1-mlogloss:1.06596	validation_1-auc:0.93510
[8]	validation_0-mlogloss:1.03585	validation_0-auc:0.93797	validation_1-mlogloss:1.03685


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:0.88931	validation_0-auc:0.92453	validation_1-mlogloss:0.88887	validation_1-auc:0.92435
[1]	validation_0-mlogloss:0.70803	validation_0-auc:0.93156	validation_1-mlogloss:0.70798	validation_1-auc:0.93095
[2]	validation_0-mlogloss:0.61153	validation_0-auc:0.93385	validation_1-mlogloss:0.61175	validation_1-auc:0.93330
[3]	validation_0-mlogloss:0.55594	validation_0-auc:0.93528	validation_1-mlogloss:0.55642	validation_1-auc:0.93471
[4]	validation_0-mlogloss:0.52279	validation_0-auc:0.93651	validation_1-mlogloss:0.52372	validation_1-auc:0.93586
[5]	validation_0-mlogloss:0.50204	validation_0-auc:0.93779	validation_1-mlogloss:0.50351	validation_1-auc:0.93705
[6]	validation_0-mlogloss:0.48876	validation_0-auc:0.93863	validation_1-mlogloss:0.49103	validation_1-auc:0.93768
[7]	validation_0-mlogloss:0.47973	validation_0-auc:0.93943	validation_1-mlogloss:0.48257	validation_1-auc:0.93832
[8]	validation_0-mlogloss:0.47336	validation_0-auc:0.94029	validation_1-mlogloss:0.47679


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.10731	validation_0-auc:0.93721	validation_1-mlogloss:1.11980	validation_1-auc:0.91377
[1]	validation_0-mlogloss:0.92914	validation_0-auc:0.95014	validation_1-mlogloss:0.95299	validation_1-auc:0.92491
[2]	validation_0-mlogloss:0.80224	validation_0-auc:0.95566	validation_1-mlogloss:0.83680	validation_1-auc:0.92950
[3]	validation_0-mlogloss:0.70734	validation_0-auc:0.95915	validation_1-mlogloss:0.75209	validation_1-auc:0.93180
[4]	validation_0-mlogloss:0.63450	validation_0-auc:0.96169	validation_1-mlogloss:0.68839	validation_1-auc:0.93353
[5]	validation_0-mlogloss:0.57738	validation_0-auc:0.96369	validation_1-mlogloss:0.63988	validation_1-auc:0.93465
[6]	validation_0-mlogloss:0.53144	validation_0-auc:0.96547	validation_1-mlogloss:0.60253	validation_1-auc:0.93555
[7]	validation_0-mlogloss:0.49469	validation_0-auc:0.96700	validation_1-mlogloss:0.57368	validation_1-auc:0.93623
[8]	validation_0-mlogloss:0.46446	validation_0-auc:0.96840	validation_1-mlogloss:0.55115


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.08052	validation_0-auc:0.92990	validation_1-mlogloss:1.08139	validation_1-auc:0.92800
[1]	validation_0-mlogloss:0.90455	validation_0-auc:0.93511	validation_1-mlogloss:0.90601	validation_1-auc:0.93316
[2]	validation_0-mlogloss:0.78524	validation_0-auc:0.93748	validation_1-mlogloss:0.78772	validation_1-auc:0.93520
[3]	validation_0-mlogloss:0.70126	validation_0-auc:0.93889	validation_1-mlogloss:0.70490	validation_1-auc:0.93632
[4]	validation_0-mlogloss:0.64087	validation_0-auc:0.93977	validation_1-mlogloss:0.64525	validation_1-auc:0.93710
[5]	validation_0-mlogloss:0.59585	validation_0-auc:0.94067	validation_1-mlogloss:0.60147	validation_1-auc:0.93773
[6]	validation_0-mlogloss:0.56182	validation_0-auc:0.94154	validation_1-mlogloss:0.56844	validation_1-auc:0.93839
[7]	validation_0-mlogloss:0.53615	validation_0-auc:0.94215	validation_1-mlogloss:0.54373	validation_1-auc:0.93877
[8]	validation_0-mlogloss:0.51643	validation_0-auc:0.94272	validation_1-mlogloss:0.52508


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.24269	validation_0-auc:0.93775	validation_1-mlogloss:1.24636	validation_1-auc:0.92360
[1]	validation_0-mlogloss:1.12887	validation_0-auc:0.94683	validation_1-mlogloss:1.13627	validation_1-auc:0.93120
[2]	validation_0-mlogloss:1.03471	validation_0-auc:0.95062	validation_1-mlogloss:1.04582	validation_1-auc:0.93404
[3]	validation_0-mlogloss:0.95539	validation_0-auc:0.95263	validation_1-mlogloss:0.97006	validation_1-auc:0.93570
[4]	validation_0-mlogloss:0.88813	validation_0-auc:0.95386	validation_1-mlogloss:0.90589	validation_1-auc:0.93677
[5]	validation_0-mlogloss:0.82980	validation_0-auc:0.95528	validation_1-mlogloss:0.85107	validation_1-auc:0.93761
[6]	validation_0-mlogloss:0.77939	validation_0-auc:0.95626	validation_1-mlogloss:0.80398	validation_1-auc:0.93803
[7]	validation_0-mlogloss:0.73538	validation_0-auc:0.95692	validation_1-mlogloss:0.76294	validation_1-auc:0.93845
[8]	validation_0-mlogloss:0.69665	validation_0-auc:0.95775	validation_1-mlogloss:0.72737


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.13690	validation_0-auc:0.92311	validation_1-mlogloss:1.14056	validation_1-auc:0.91433
[1]	validation_0-mlogloss:0.97256	validation_0-auc:0.93669	validation_1-mlogloss:0.98008	validation_1-auc:0.92734
[2]	validation_0-mlogloss:0.85380	validation_0-auc:0.94185	validation_1-mlogloss:0.86487	validation_1-auc:0.93209
[3]	validation_0-mlogloss:0.76504	validation_0-auc:0.94471	validation_1-mlogloss:0.77995	validation_1-auc:0.93402
[4]	validation_0-mlogloss:0.69690	validation_0-auc:0.94667	validation_1-mlogloss:0.71529	validation_1-auc:0.93547
[5]	validation_0-mlogloss:0.64435	validation_0-auc:0.94783	validation_1-mlogloss:0.66603	validation_1-auc:0.93605
[6]	validation_0-mlogloss:0.60269	validation_0-auc:0.94878	validation_1-mlogloss:0.62769	validation_1-auc:0.93647
[7]	validation_0-mlogloss:0.56841	validation_0-auc:0.94975	validation_1-mlogloss:0.59664	validation_1-auc:0.93692
[8]	validation_0-mlogloss:0.54090	validation_0-auc:0.95073	validation_1-mlogloss:0.57209


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.10269	validation_0-auc:0.92863	validation_1-mlogloss:1.11579	validation_1-auc:0.90637
[1]	validation_0-mlogloss:0.92486	validation_0-auc:0.94498	validation_1-mlogloss:0.94965	validation_1-auc:0.92099
[2]	validation_0-mlogloss:0.79841	validation_0-auc:0.95203	validation_1-mlogloss:0.83418	validation_1-auc:0.92653
[3]	validation_0-mlogloss:0.70387	validation_0-auc:0.95632	validation_1-mlogloss:0.74998	validation_1-auc:0.92943
[4]	validation_0-mlogloss:0.63192	validation_0-auc:0.95916	validation_1-mlogloss:0.68706	validation_1-auc:0.93156
[5]	validation_0-mlogloss:0.57608	validation_0-auc:0.96127	validation_1-mlogloss:0.63994	validation_1-auc:0.93263
[6]	validation_0-mlogloss:0.53145	validation_0-auc:0.96334	validation_1-mlogloss:0.60430	validation_1-auc:0.93345
[7]	validation_0-mlogloss:0.49531	validation_0-auc:0.96504	validation_1-mlogloss:0.57659	validation_1-auc:0.93413
[8]	validation_0-mlogloss:0.46680	validation_0-auc:0.96652	validation_1-mlogloss:0.55543


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:0.93248	validation_0-auc:0.93022	validation_1-mlogloss:0.93330	validation_1-auc:0.92860
[1]	validation_0-mlogloss:0.74369	validation_0-auc:0.93604	validation_1-mlogloss:0.74662	validation_1-auc:0.93359
[2]	validation_0-mlogloss:0.63621	validation_0-auc:0.93841	validation_1-mlogloss:0.64107	validation_1-auc:0.93544
[3]	validation_0-mlogloss:0.57046	validation_0-auc:0.93993	validation_1-mlogloss:0.57705	validation_1-auc:0.93662
[4]	validation_0-mlogloss:0.52868	validation_0-auc:0.94130	validation_1-mlogloss:0.53732	validation_1-auc:0.93752
[5]	validation_0-mlogloss:0.50137	validation_0-auc:0.94252	validation_1-mlogloss:0.51222	validation_1-auc:0.93838
[6]	validation_0-mlogloss:0.48288	validation_0-auc:0.94367	validation_1-mlogloss:0.49604	validation_1-auc:0.93897
[7]	validation_0-mlogloss:0.46994	validation_0-auc:0.94467	validation_1-mlogloss:0.48500	validation_1-auc:0.93954
[8]	validation_0-mlogloss:0.46080	validation_0-auc:0.94553	validation_1-mlogloss:0.47785


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.15160	validation_0-auc:0.92331	validation_1-mlogloss:1.15890	validation_1-auc:0.90664
[1]	validation_0-mlogloss:0.99211	validation_0-auc:0.93914	validation_1-mlogloss:1.00566	validation_1-auc:0.92150
[2]	validation_0-mlogloss:0.87372	validation_0-auc:0.94560	validation_1-mlogloss:0.89359	validation_1-auc:0.92760
[3]	validation_0-mlogloss:0.78175	validation_0-auc:0.94963	validation_1-mlogloss:0.80753	validation_1-auc:0.93079
[4]	validation_0-mlogloss:0.71049	validation_0-auc:0.95193	validation_1-mlogloss:0.74197	validation_1-auc:0.93232
[5]	validation_0-mlogloss:0.65290	validation_0-auc:0.95388	validation_1-mlogloss:0.68989	validation_1-auc:0.93376
[6]	validation_0-mlogloss:0.60628	validation_0-auc:0.95561	validation_1-mlogloss:0.64872	validation_1-auc:0.93468
[7]	validation_0-mlogloss:0.56803	validation_0-auc:0.95699	validation_1-mlogloss:0.61543	validation_1-auc:0.93539
[8]	validation_0-mlogloss:0.53625	validation_0-auc:0.95815	validation_1-mlogloss:0.58885


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.24635	validation_0-auc:0.93323	validation_1-mlogloss:1.24710	validation_1-auc:0.92878
[1]	validation_0-mlogloss:1.13618	validation_0-auc:0.93859	validation_1-mlogloss:1.13785	validation_1-auc:0.93396
[2]	validation_0-mlogloss:1.04513	validation_0-auc:0.94035	validation_1-mlogloss:1.04763	validation_1-auc:0.93559
[3]	validation_0-mlogloss:0.96869	validation_0-auc:0.94138	validation_1-mlogloss:0.97222	validation_1-auc:0.93643
[4]	validation_0-mlogloss:0.90410	validation_0-auc:0.94209	validation_1-mlogloss:0.90845	validation_1-auc:0.93697
[5]	validation_0-mlogloss:0.84882	validation_0-auc:0.94283	validation_1-mlogloss:0.85423	validation_1-auc:0.93746
[6]	validation_0-mlogloss:0.80091	validation_0-auc:0.94332	validation_1-mlogloss:0.80721	validation_1-auc:0.93781
[7]	validation_0-mlogloss:0.75907	validation_0-auc:0.94370	validation_1-mlogloss:0.76629	validation_1-auc:0.93811
[8]	validation_0-mlogloss:0.72272	validation_0-auc:0.94410	validation_1-mlogloss:0.73080


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:0.91229	validation_0-auc:0.91612	validation_1-mlogloss:0.91254	validation_1-auc:0.91545
[1]	validation_0-mlogloss:0.72752	validation_0-auc:0.92882	validation_1-mlogloss:0.72811	validation_1-auc:0.92816
[2]	validation_0-mlogloss:0.62690	validation_0-auc:0.93286	validation_1-mlogloss:0.62813	validation_1-auc:0.93207
[3]	validation_0-mlogloss:0.56768	validation_0-auc:0.93533	validation_1-mlogloss:0.56953	validation_1-auc:0.93435
[4]	validation_0-mlogloss:0.53128	validation_0-auc:0.93687	validation_1-mlogloss:0.53406	validation_1-auc:0.93568
[5]	validation_0-mlogloss:0.50936	validation_0-auc:0.93803	validation_1-mlogloss:0.51294	validation_1-auc:0.93666
[6]	validation_0-mlogloss:0.49516	validation_0-auc:0.93880	validation_1-mlogloss:0.49973	validation_1-auc:0.93718
[7]	validation_0-mlogloss:0.48403	validation_0-auc:0.93980	validation_1-mlogloss:0.48958	validation_1-auc:0.93784
[8]	validation_0-mlogloss:0.47671	validation_0-auc:0.94062	validation_1-mlogloss:0.48327


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.02627	validation_0-auc:0.93266	validation_1-mlogloss:1.02881	validation_1-auc:0.92868
[1]	validation_0-mlogloss:0.83930	validation_0-auc:0.93810	validation_1-mlogloss:0.84426	validation_1-auc:0.93359
[2]	validation_0-mlogloss:0.71987	validation_0-auc:0.94093	validation_1-mlogloss:0.72747	validation_1-auc:0.93585
[3]	validation_0-mlogloss:0.63945	validation_0-auc:0.94260	validation_1-mlogloss:0.64944	validation_1-auc:0.93707
[4]	validation_0-mlogloss:0.58351	validation_0-auc:0.94374	validation_1-mlogloss:0.59593	validation_1-auc:0.93776
[5]	validation_0-mlogloss:0.54344	validation_0-auc:0.94505	validation_1-mlogloss:0.55848	validation_1-auc:0.93857
[6]	validation_0-mlogloss:0.51428	validation_0-auc:0.94626	validation_1-mlogloss:0.53175	validation_1-auc:0.93935
[7]	validation_0-mlogloss:0.49296	validation_0-auc:0.94702	validation_1-mlogloss:0.51304	validation_1-auc:0.93964
[8]	validation_0-mlogloss:0.47653	validation_0-auc:0.94793	validation_1-mlogloss:0.49926


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.04653	validation_0-auc:0.93960	validation_1-mlogloss:1.06052	validation_1-auc:0.91895
[1]	validation_0-mlogloss:0.85363	validation_0-auc:0.95152	validation_1-mlogloss:0.87922	validation_1-auc:0.92956
[2]	validation_0-mlogloss:0.72427	validation_0-auc:0.95668	validation_1-mlogloss:0.76073	validation_1-auc:0.93377
[3]	validation_0-mlogloss:0.63295	validation_0-auc:0.95954	validation_1-mlogloss:0.67901	validation_1-auc:0.93577
[4]	validation_0-mlogloss:0.56649	validation_0-auc:0.96182	validation_1-mlogloss:0.62196	validation_1-auc:0.93680
[5]	validation_0-mlogloss:0.51625	validation_0-auc:0.96384	validation_1-mlogloss:0.58127	validation_1-auc:0.93745
[6]	validation_0-mlogloss:0.47725	validation_0-auc:0.96573	validation_1-mlogloss:0.55135	validation_1-auc:0.93801
[7]	validation_0-mlogloss:0.44749	validation_0-auc:0.96725	validation_1-mlogloss:0.52951	validation_1-auc:0.93844
[8]	validation_0-mlogloss:0.42347	validation_0-auc:0.96871	validation_1-mlogloss:0.51325


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.14849	validation_0-auc:0.92327	validation_1-mlogloss:1.14920	validation_1-auc:0.92058
[1]	validation_0-mlogloss:0.99066	validation_0-auc:0.93212	validation_1-mlogloss:0.99216	validation_1-auc:0.92955
[2]	validation_0-mlogloss:0.87437	validation_0-auc:0.93662	validation_1-mlogloss:0.87666	validation_1-auc:0.93415
[3]	validation_0-mlogloss:0.78749	validation_0-auc:0.93822	validation_1-mlogloss:0.79043	validation_1-auc:0.93574
[4]	validation_0-mlogloss:0.72130	validation_0-auc:0.93910	validation_1-mlogloss:0.72474	validation_1-auc:0.93657
[5]	validation_0-mlogloss:0.66951	validation_0-auc:0.93984	validation_1-mlogloss:0.67376	validation_1-auc:0.93720
[6]	validation_0-mlogloss:0.62816	validation_0-auc:0.94056	validation_1-mlogloss:0.63336	validation_1-auc:0.93769
[7]	validation_0-mlogloss:0.59513	validation_0-auc:0.94111	validation_1-mlogloss:0.60110	validation_1-auc:0.93806
[8]	validation_0-mlogloss:0.56851	validation_0-auc:0.94161	validation_1-mlogloss:0.57545


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:0.94059	validation_0-auc:0.91422	validation_1-mlogloss:0.94539	validation_1-auc:0.90868
[1]	validation_0-mlogloss:0.75105	validation_0-auc:0.92692	validation_1-mlogloss:0.75984	validation_1-auc:0.92093
[2]	validation_0-mlogloss:0.64199	validation_0-auc:0.93290	validation_1-mlogloss:0.65499	validation_1-auc:0.92630
[3]	validation_0-mlogloss:0.57571	validation_0-auc:0.93625	validation_1-mlogloss:0.59265	validation_1-auc:0.92919
[4]	validation_0-mlogloss:0.53425	validation_0-auc:0.93857	validation_1-mlogloss:0.55545	validation_1-auc:0.93083
[5]	validation_0-mlogloss:0.50762	validation_0-auc:0.94014	validation_1-mlogloss:0.53336	validation_1-auc:0.93182
[6]	validation_0-mlogloss:0.48937	validation_0-auc:0.94148	validation_1-mlogloss:0.51864	validation_1-auc:0.93292
[7]	validation_0-mlogloss:0.47759	validation_0-auc:0.94254	validation_1-mlogloss:0.51017	validation_1-auc:0.93348
[8]	validation_0-mlogloss:0.46952	validation_0-auc:0.94340	validation_1-mlogloss:0.50524


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:0.91404	validation_0-auc:0.91977	validation_1-mlogloss:0.91445	validation_1-auc:0.91919
[1]	validation_0-mlogloss:0.72823	validation_0-auc:0.92995	validation_1-mlogloss:0.72942	validation_1-auc:0.92938
[2]	validation_0-mlogloss:0.62385	validation_0-auc:0.93478	validation_1-mlogloss:0.62566	validation_1-auc:0.93386
[3]	validation_0-mlogloss:0.56306	validation_0-auc:0.93692	validation_1-mlogloss:0.56556	validation_1-auc:0.93589
[4]	validation_0-mlogloss:0.52643	validation_0-auc:0.93815	validation_1-mlogloss:0.52970	validation_1-auc:0.93686
[5]	validation_0-mlogloss:0.50340	validation_0-auc:0.93923	validation_1-mlogloss:0.50745	validation_1-auc:0.93780
[6]	validation_0-mlogloss:0.48802	validation_0-auc:0.94026	validation_1-mlogloss:0.49304	validation_1-auc:0.93856
[7]	validation_0-mlogloss:0.47767	validation_0-auc:0.94108	validation_1-mlogloss:0.48377	validation_1-auc:0.93907
[8]	validation_0-mlogloss:0.47051	validation_0-auc:0.94180	validation_1-mlogloss:0.47767


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:0.94270	validation_0-auc:0.93440	validation_1-mlogloss:0.94834	validation_1-auc:0.92692
[1]	validation_0-mlogloss:0.74799	validation_0-auc:0.94169	validation_1-mlogloss:0.75963	validation_1-auc:0.93300
[2]	validation_0-mlogloss:0.63434	validation_0-auc:0.94506	validation_1-mlogloss:0.65189	validation_1-auc:0.93526
[3]	validation_0-mlogloss:0.56287	validation_0-auc:0.94712	validation_1-mlogloss:0.58570	validation_1-auc:0.93671
[4]	validation_0-mlogloss:0.51646	validation_0-auc:0.94871	validation_1-mlogloss:0.54387	validation_1-auc:0.93764
[5]	validation_0-mlogloss:0.48448	validation_0-auc:0.95028	validation_1-mlogloss:0.51756	validation_1-auc:0.93813
[6]	validation_0-mlogloss:0.46306	validation_0-auc:0.95142	validation_1-mlogloss:0.50001	validation_1-auc:0.93864
[7]	validation_0-mlogloss:0.44762	validation_0-auc:0.95251	validation_1-mlogloss:0.48861	validation_1-auc:0.93913
[8]	validation_0-mlogloss:0.43644	validation_0-auc:0.95345	validation_1-mlogloss:0.48114


  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):



[0]	validation_0-mlogloss:1.08173	validation_0-auc:0.93387	validation_1-mlogloss:1.09206	validation_1-auc:0.91546
[1]	validation_0-mlogloss:0.89777	validation_0-auc:0.94684	validation_1-mlogloss:0.91789	validation_1-auc:0.92707
[2]	validation_0-mlogloss:0.77075	validation_0-auc:0.95244	validation_1-mlogloss:0.79891	validation_1-auc:0.93209
[3]	validation_0-mlogloss:0.67824	validation_0-auc:0.95561	validation_1-mlogloss:0.71481	validation_1-auc:0.93416
[4]	validation_0-mlogloss:0.60987	validation_0-auc:0.95779	validation_1-mlogloss:0.65409	validation_1-auc:0.93550
[5]	validation_0-mlogloss:0.55765	validation_0-auc:0.95960	validation_1-mlogloss:0.60914	validation_1-auc:0.93640
[6]	validation_0-mlogloss:0.51616	validation_0-auc:0.96147	validation_1-mlogloss:0.57511	validation_1-auc:0.93722
[7]	validation_0-mlogloss:0.48406	validation_0-auc:0.96288	validation_1-mlogloss:0.54960	validation_1-auc:0.93780
[8]	validation_0-mlogloss:0.45806	validation_0-auc:0.96421	validation_1-mlogloss:0.53031

0.8554717542222541                                                                                             
0.8153173438703023                                                                                             
{'colsample_bytree': 0.39, 'gamma': 0.47000000000000003, 'learning_rate': 0.26, 'max_depth': 7, 'min_child_weight': 5.0, 'n_estimators': 90, 'subsample': 0.6900000000000001}

0.8385289020601113                                                                                             
0.8165376178508651                                                                                             
{'colsample_bytree': 0.93, 'gamma': 0.15, 'learning_rate': 0.04, 'max_depth': 8, 'min_child_weight': 5.0, 'n_estimators': 175, 'subsample': 0.72}

In [6]:
bestParams = {'colsample_bytree': 0.68, 'gamma': 0.41000000000000003, 'learning_rate': 0.11, 'max_depth': 8, 'min_child_weight': 3.0, 'n_estimators': 80, 'subsample': 0.75}

#generate xgboost classifier
xgb = XGB.XGBClassifier(**bestParams,
                    verbosity=1,
                   eval_metric=['mlogloss', 'auc'])
model = xgb.fit(x_train_1, y_train_1, eval_set=[(x_train_1, y_train_1), (x_val, y_val)], verbose=1, early_stopping_rounds=5,)
preds_tr = xgb.predict(x_train_1)
preds_val = xgb.predict(x_val)

  y = column_or_1d(y, warn=True)
  y = column_or_1d(y, warn=True)
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):


[0]	validation_0-mlogloss:1.24635	validation_0-auc:0.93323	validation_1-mlogloss:1.24710	validation_1-auc:0.92878
[1]	validation_0-mlogloss:1.13618	validation_0-auc:0.93859	validation_1-mlogloss:1.13785	validation_1-auc:0.93396
[2]	validation_0-mlogloss:1.04513	validation_0-auc:0.94035	validation_1-mlogloss:1.04763	validation_1-auc:0.93559
[3]	validation_0-mlogloss:0.96869	validation_0-auc:0.94138	validation_1-mlogloss:0.97222	validation_1-auc:0.93643
[4]	validation_0-mlogloss:0.90410	validation_0-auc:0.94209	validation_1-mlogloss:0.90845	validation_1-auc:0.93697
[5]	validation_0-mlogloss:0.84882	validation_0-auc:0.94283	validation_1-mlogloss:0.85423	validation_1-auc:0.93746
[6]	validation_0-mlogloss:0.80091	validation_0-auc:0.94332	validation_1-mlogloss:0.80721	validation_1-auc:0.93781
[7]	validation_0-mlogloss:0.75907	validation_0-auc:0.94370	validation_1-mlogloss:0.76629	validation_1-auc:0.93811
[8]	validation_0-mlogloss:0.72272	validation_0-auc:0.94410	validation_1-mlogloss:0.73080

[72]	validation_0-mlogloss:0.39139	validation_0-auc:0.96181	validation_1-mlogloss:0.45246	validation_1-auc:0.94483
[73]	validation_0-mlogloss:0.39044	validation_0-auc:0.96203	validation_1-mlogloss:0.45236	validation_1-auc:0.94485
[74]	validation_0-mlogloss:0.38957	validation_0-auc:0.96224	validation_1-mlogloss:0.45226	validation_1-auc:0.94488
[75]	validation_0-mlogloss:0.38871	validation_0-auc:0.96243	validation_1-mlogloss:0.45218	validation_1-auc:0.94490
[76]	validation_0-mlogloss:0.38790	validation_0-auc:0.96261	validation_1-mlogloss:0.45209	validation_1-auc:0.94493
[77]	validation_0-mlogloss:0.38693	validation_0-auc:0.96283	validation_1-mlogloss:0.45197	validation_1-auc:0.94496
[78]	validation_0-mlogloss:0.38606	validation_0-auc:0.96303	validation_1-mlogloss:0.45187	validation_1-auc:0.94498
[79]	validation_0-mlogloss:0.38540	validation_0-auc:0.96318	validation_1-mlogloss:0.45177	validation_1-auc:0.94500


0.8171477548411464

In [7]:
accuracy_score(y_train_1, preds_tr)

0.8474810037018428

In [8]:
accuracy_score(y_val, preds_val)

0.8171477548411464

In [9]:
probs_tr = xgb.predict_proba(x_train_1)
probs_val = xgb.predict_proba(x_val)

In [11]:
np.save("Preds/XGB_val", probs_val)
np.save("Preds/XGB_tr", probs_tr)

probs_test = xgb.predict_proba(x_test)
np.save("Preds/XGB_test", probs_test)