***Optimization of Wafer dataset with XGBoost***

In [None]:
!pip install optuna
!pip install xgboost

Collecting optuna
  Downloading optuna-2.10.0-py3-none-any.whl (308 kB)
[K     |████████████████████████████████| 308 kB 5.3 MB/s 
Collecting alembic
  Downloading alembic-1.7.5-py3-none-any.whl (209 kB)
[K     |████████████████████████████████| 209 kB 47.8 MB/s 
[?25hCollecting colorlog
  Downloading colorlog-6.6.0-py2.py3-none-any.whl (11 kB)
Collecting cmaes>=0.8.2
  Downloading cmaes-0.8.2-py3-none-any.whl (15 kB)
Collecting cliff
  Downloading cliff-3.9.0-py3-none-any.whl (80 kB)
[K     |████████████████████████████████| 80 kB 8.7 MB/s 
Collecting Mako
  Downloading Mako-1.1.6-py2.py3-none-any.whl (75 kB)
[K     |████████████████████████████████| 75 kB 4.0 MB/s 
Collecting autopage>=0.4.0
  Downloading autopage-0.4.0-py3-none-any.whl (20 kB)
Collecting pbr!=2.1.0,>=2.0.0
  Downloading pbr-5.8.0-py2.py3-none-any.whl (112 kB)
[K     |████████████████████████████████| 112 kB 72.3 MB/s 
[?25hCollecting stevedore>=2.0.1
  Downloading stevedore-3.5.0-py3-none-any.whl (49 kB)
[K 

In [None]:
import numpy as np
import pandas as pd 
import optuna
import xgboost as xgb 

In [None]:
from sklearn.model_selection import train_test_split

In [None]:
df = pd.read_csv('wafer_16012020_051629.csv')

In [None]:
x= df.drop(columns=['Unnamed: 0' , 'Good/Bad'])

In [None]:
for (columnName, columnData) in x.iteritems():
    x[columnName]= x[columnName].fillna(x[columnName].mean())

In [None]:
y = df['Good/Bad']
y

0     1
1     1
2     1
3     1
4     1
     ..
95   -1
96   -1
97   -1
98   -1
99   -1
Name: Good/Bad, Length: 100, dtype: int64

In [None]:
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split

In [None]:
sta_sca = StandardScaler()
x = sta_sca.fit_transform(x)

In [None]:
train_x , test_x , train_y , test_y= train_test_split(x , y , test_size = .20 )

In [None]:
def objective_classification(trial) :
    train_x , test_x , train_y , test_y= train_test_split(x , y , test_size = .20 )
    param  = {
        #'tree_method' :'approx',
        
        'sketch_eps' :trial.suggest_float('sketch_eps',0.01,1.0), ## Only used for 'approx' tree method, consider setting low number for accuracy 
        
        'verbosity':0, ##print more info on tree buliding process
        
        'validate_parameters':'True', ## performs validation to check whether parameter exists or not.

        'n_estimators' : 20000,
        
        #'nthread' : 10, ## No. of parallel threads to run Xgboost
        
        'max_depth' :trial.suggest_int('max_depth',1,20), ## Higher the depth of value more complex and ovefitting could occur
        
        'booster':trial.suggest_categorical('booster',['dart','gbtree','gblinear']), ## Dart and gbree for tree based booster method. gblinear used for linear function.
        
        'base_score' :trial.suggest_float('base_score' , .1 , .9), ## initial prediction score of all instances
        
        'eval_metric' :'logloss', ## Added as objective used as evaluation  metrics for validation data, multiple metrics can be passed in a list for evaluation.
        
        'seed' :5, ## Random number seed
        
        'objective' : 'binary:logistic' , ## Used to pass learning objective to be applied on model to get desired result
        
        'lambda' : trial.suggest_float('lambda' , 1e-4 , 1.0), ## Model will become more conservative with increase in value. L2 regularization term on weights
        
        'alpha' :trial.suggest_float('alpha' , 1e-4 , 1.0), ## Model will become more conservative with increase in value. L1 regularization term on weights
        
        'subsample' : trial.suggest_float('subsample' , .1,.5), ## Occurs once in every bossting itration. Tuning value to 0.5 means that XGBoost would randomly sample half of the training data
        
        'colsample_bytree' : trial.suggest_float('colsample_bytree' , .5 ,1.0), ## Subsample ratio of columns when constructing each tree
        
        'min_child_weight' :trial.suggest_int('min_child_weight',1,10), ## Minimum sum of weights of all observations required in a child.Too high values can lead to under-fitting hence, it should be tuned using CV
        
        'max_delta_step' :trial.suggest_int('max_delta_step',1,10), ## Used when in logistic regression when class is extremely imbalanced. Tuning value between 1-10 help control the update.
        
        'sampling_method':trial.suggest_categorical('sampling_method',['uniform','gradient_based']), ## used to sample the training instances. set subsample >= 0.5 for good results.
        
        #'updater':'grow_colmaker', ## An advanced parameter that is usually set automatically, depending on some other parameters, used to run sequence of tree updaters 
        
        
    }
   
    if param['booster'] in ['dart','gbtree']:
      
      param['gamma'] : trail.suggest_float('gamma',1e-3,4.0) ## Gamma specifies the minimum loss reduction required to make a split. The values can vary depending on the loss function and should be tuned.
      
      param['eta'] : trail.suggest_float('eta',.01,0.2) ##Makes the model more robust by shrinking the weights on each step. Typical final values to be used: 0.01-0.2
      

    if param['booster'] == 'dart':
      param['sample_type']:trail.suggest_categorical('sample_type',['uniform','weighted']) ## uniform: dropped trees are selected uniformly.weighted: dropped trees are selected in proportion to weight.

      param['normalize_type']:trail.suggest_categorical('normalize_type',['tree','forest']) ## tree: new trees have the same weight of each of dropped trees. forest: new trees have the same weight of sum of dropped trees (forest).

      param['rate_drop'] :trail.suggest_float('rate_drop',0.0,1.0) ## Dropout rate and has range from 0.0 to 1.0.

      param['one_drop'] :0 ## Least one tree is always dropped during the dropout if this enabled

      param['skip_drop'] :trail.suggest_float('skip_drop',0.0,1.0) ## Probability of skipping the dropout procedure and has range from 0.0 to 1.0.

    if param['booster'] == 'gblinear':
      param['updater']:trail.suggest_categorical('updater',['shotgun','coord_descent']) ## Defines various algo to fit linear based model.
      param['feature_selector']:trail.suggest_categorical('feature_selector',['cyclic','shuffle','random','greedy','thrifty']) ## Defines various feature selection and ordering method.
        
    xgb_classification = xgb.XGBClassifier(**param)
    xgb_classification.fit(train_x , train_y,eval_set  = [(test_x, test_y)],eval_metric='logloss',verbose=True)
 
    pred = xgb_classification.predict(test_x)
    accuracy  = xgb_classification.score(test_x,test_y) 
    #
    return accuracy
        
        
    

In [None]:
find_param = optuna.create_study(direction='minimize')

[32m[I 2021-11-22 17:14:37,520][0m A new study created in memory with name: no-name-827a8cc7-9a32-4d1e-86c6-64dc9f0c28ea[0m


In [None]:
find_param.optimize(objective_classification  , n_trials=10)

[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.533755
[15001]	validation_0-logloss:0.533755
[15002]	validation_0-logloss:0.533755
[15003]	validation_0-logloss:0.533755
[15004]	validation_0-logloss:0.533755
[15005]	validation_0-logloss:0.533755
[15006]	validation_0-logloss:0.533755
[15007]	validation_0-logloss:0.533755
[15008]	validation_0-logloss:0.533755
[15009]	validation_0-logloss:0.533755
[15010]	validation_0-logloss:0.533755
[15011]	validation_0-logloss:0.533755
[15012]	validation_0-logloss:0.533755
[15013]	validation_0-logloss:0.533755
[15014]	validation_0-logloss:0.533755
[15015]	validation_0-logloss:0.533755
[15016]	validation_0-logloss:0.533755
[15017]	validation_0-logloss:0.533755
[15018]	validation_0-logloss:0.533755
[15019]	validation_0-logloss:0.533755
[15020]	validation_0-logloss:0.533755
[15021]	validation_0-logloss:0.533755
[15022]	validation_0-logloss:0.533755
[15023]	validation_0-logloss:0.533755
[15024]	validation_0-lo

[32m[I 2021-11-22 17:17:09,810][0m Trial 0 finished with value: 0.7 and parameters: {'sketch_eps': 0.589973573588527, 'max_depth': 17, 'booster': 'gblinear', 'base_score': 0.24901955138128928, 'lambda': 0.40246051256855986, 'alpha': 0.3751384171852157, 'subsample': 0.292654291749962, 'colsample_bytree': 0.8002511874931637, 'min_child_weight': 9, 'max_delta_step': 8, 'sampling_method': 'gradient_based'}. Best is trial 0 with value: 0.7.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.491195
[15001]	validation_0-logloss:0.491195
[15002]	validation_0-logloss:0.491195
[15003]	validation_0-logloss:0.491195
[15004]	validation_0-logloss:0.491195
[15005]	validation_0-logloss:0.491195
[15006]	validation_0-logloss:0.491195
[15007]	validation_0-logloss:0.491195
[15008]	validation_0-logloss:0.491195
[15009]	validation_0-logloss:0.491195
[15010]	validation_0-logloss:0.491195
[15011]	validation_0-logloss:0.491195
[15012]	validation_0-logloss:0.491195
[15013]	validation_0-logloss:0.491195
[15014]	validation_0-logloss:0.491195
[15015]	validation_0-logloss:0.491195
[15016]	validation_0-logloss:0.491195
[15017]	validation_0-logloss:0.491195
[15018]	validation_0-logloss:0.491195
[15019]	validation_0-logloss:0.491195
[15020]	validation_0-logloss:0.491195
[15021]	validation_0-logloss:0.491195
[15022]	validation_0-logloss:0.491195
[15023]	validation_0-logloss:0.491195
[15024]	validation_0-lo

[32m[I 2021-11-22 17:19:54,429][0m Trial 1 finished with value: 0.6 and parameters: {'sketch_eps': 0.09426341820735383, 'max_depth': 15, 'booster': 'gblinear', 'base_score': 0.49110007578994497, 'lambda': 0.03893374170900119, 'alpha': 0.11459848092774408, 'subsample': 0.12735049303811566, 'colsample_bytree': 0.646446674260736, 'min_child_weight': 7, 'max_delta_step': 3, 'sampling_method': 'gradient_based'}. Best is trial 1 with value: 0.6.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.585077
[15001]	validation_0-logloss:0.582227
[15002]	validation_0-logloss:0.581158
[15003]	validation_0-logloss:0.582751
[15004]	validation_0-logloss:0.581129
[15005]	validation_0-logloss:0.579798
[15006]	validation_0-logloss:0.580926
[15007]	validation_0-logloss:0.579793
[15008]	validation_0-logloss:0.581211
[15009]	validation_0-logloss:0.58
[15010]	validation_0-logloss:0.58173
[15011]	validation_0-logloss:0.584624
[15012]	validation_0-logloss:0.585666
[15013]	validation_0-logloss:0.583991
[15014]	validation_0-logloss:0.584799
[15015]	validation_0-logloss:0.582421
[15016]	validation_0-logloss:0.582779
[15017]	validation_0-logloss:0.582804
[15018]	validation_0-logloss:0.58249
[15019]	validation_0-logloss:0.581566
[15020]	validation_0-logloss:0.584622
[15021]	validation_0-logloss:0.585937
[15022]	validation_0-logloss:0.587065
[15023]	validation_0-logloss:0.587479
[15024]	validation_0-logloss:

[32m[I 2021-11-22 17:27:19,575][0m Trial 2 finished with value: 0.65 and parameters: {'sketch_eps': 0.49385902462149206, 'max_depth': 3, 'booster': 'dart', 'base_score': 0.24174509964938942, 'lambda': 0.8888898300768819, 'alpha': 0.9427014441585629, 'subsample': 0.44133018957660064, 'colsample_bytree': 0.6186744863860604, 'min_child_weight': 2, 'max_delta_step': 4, 'sampling_method': 'uniform'}. Best is trial 1 with value: 0.6.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.613093
[15001]	validation_0-logloss:0.6127
[15002]	validation_0-logloss:0.611568
[15003]	validation_0-logloss:0.611603
[15004]	validation_0-logloss:0.611506
[15005]	validation_0-logloss:0.611841
[15006]	validation_0-logloss:0.610957
[15007]	validation_0-logloss:0.610942
[15008]	validation_0-logloss:0.61225
[15009]	validation_0-logloss:0.611788
[15010]	validation_0-logloss:0.611667
[15011]	validation_0-logloss:0.612974
[15012]	validation_0-logloss:0.612955
[15013]	validation_0-logloss:0.61151
[15014]	validation_0-logloss:0.612037
[15015]	validation_0-logloss:0.611137
[15016]	validation_0-logloss:0.610939
[15017]	validation_0-logloss:0.611034
[15018]	validation_0-logloss:0.610902
[15019]	validation_0-logloss:0.610897
[15020]	validation_0-logloss:0.610918
[15021]	validation_0-logloss:0.61093
[15022]	validation_0-logloss:0.611366
[15023]	validation_0-logloss:0.61156
[15024]	validation_0-logloss:

[32m[I 2021-11-22 17:34:29,926][0m Trial 3 finished with value: 0.7 and parameters: {'sketch_eps': 0.2533163076898254, 'max_depth': 16, 'booster': 'dart', 'base_score': 0.8263512822713862, 'lambda': 0.27346423506070106, 'alpha': 0.19006305004285917, 'subsample': 0.14983316289411153, 'colsample_bytree': 0.8130208265591499, 'min_child_weight': 10, 'max_delta_step': 4, 'sampling_method': 'uniform'}. Best is trial 1 with value: 0.6.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.661812
[15001]	validation_0-logloss:0.660797
[15002]	validation_0-logloss:0.662437
[15003]	validation_0-logloss:0.660374
[15004]	validation_0-logloss:0.660681
[15005]	validation_0-logloss:0.658564
[15006]	validation_0-logloss:0.662253
[15007]	validation_0-logloss:0.662982
[15008]	validation_0-logloss:0.657462
[15009]	validation_0-logloss:0.655228
[15010]	validation_0-logloss:0.65328
[15011]	validation_0-logloss:0.652881
[15012]	validation_0-logloss:0.652066
[15013]	validation_0-logloss:0.65412
[15014]	validation_0-logloss:0.654099
[15015]	validation_0-logloss:0.655155
[15016]	validation_0-logloss:0.657221
[15017]	validation_0-logloss:0.656007
[15018]	validation_0-logloss:0.656716
[15019]	validation_0-logloss:0.655007
[15020]	validation_0-logloss:0.656023
[15021]	validation_0-logloss:0.656723
[15022]	validation_0-logloss:0.65664
[15023]	validation_0-logloss:0.658401
[15024]	validation_0-loglo

[32m[I 2021-11-22 17:37:05,517][0m Trial 4 finished with value: 0.65 and parameters: {'sketch_eps': 0.21378157515589225, 'max_depth': 1, 'booster': 'gbtree', 'base_score': 0.4610125322742812, 'lambda': 0.8471625999953278, 'alpha': 0.3929306265210404, 'subsample': 0.37273636311703107, 'colsample_bytree': 0.8094600324222392, 'min_child_weight': 7, 'max_delta_step': 2, 'sampling_method': 'gradient_based'}. Best is trial 1 with value: 0.6.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.486093
[15001]	validation_0-logloss:0.484288
[15002]	validation_0-logloss:0.488239
[15003]	validation_0-logloss:0.489098
[15004]	validation_0-logloss:0.486281
[15005]	validation_0-logloss:0.490967
[15006]	validation_0-logloss:0.492368
[15007]	validation_0-logloss:0.487886
[15008]	validation_0-logloss:0.49624
[15009]	validation_0-logloss:0.500057
[15010]	validation_0-logloss:0.507137
[15011]	validation_0-logloss:0.502938
[15012]	validation_0-logloss:0.504361
[15013]	validation_0-logloss:0.501189
[15014]	validation_0-logloss:0.493485
[15015]	validation_0-logloss:0.485528
[15016]	validation_0-logloss:0.481149
[15017]	validation_0-logloss:0.48527
[15018]	validation_0-logloss:0.493494
[15019]	validation_0-logloss:0.494714
[15020]	validation_0-logloss:0.490004
[15021]	validation_0-logloss:0.49883
[15022]	validation_0-logloss:0.510759
[15023]	validation_0-logloss:0.503452
[15024]	validation_0-loglo

[32m[I 2021-11-22 17:44:13,522][0m Trial 5 finished with value: 0.85 and parameters: {'sketch_eps': 0.40313227509564437, 'max_depth': 11, 'booster': 'dart', 'base_score': 0.2938227630555881, 'lambda': 0.9037656575854023, 'alpha': 0.599389415519816, 'subsample': 0.26076333497518933, 'colsample_bytree': 0.6526069784558832, 'min_child_weight': 9, 'max_delta_step': 10, 'sampling_method': 'gradient_based'}. Best is trial 1 with value: 0.6.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.783026
[15001]	validation_0-logloss:0.795416
[15002]	validation_0-logloss:0.792982
[15003]	validation_0-logloss:0.805318
[15004]	validation_0-logloss:0.799382
[15005]	validation_0-logloss:0.811658
[15006]	validation_0-logloss:0.813703
[15007]	validation_0-logloss:0.806154
[15008]	validation_0-logloss:0.802991
[15009]	validation_0-logloss:0.796534
[15010]	validation_0-logloss:0.803502
[15011]	validation_0-logloss:0.816177
[15012]	validation_0-logloss:0.801596
[15013]	validation_0-logloss:0.792853
[15014]	validation_0-logloss:0.78326
[15015]	validation_0-logloss:0.791669
[15016]	validation_0-logloss:0.775585
[15017]	validation_0-logloss:0.770205
[15018]	validation_0-logloss:0.757861
[15019]	validation_0-logloss:0.765476
[15020]	validation_0-logloss:0.766426
[15021]	validation_0-logloss:0.762725
[15022]	validation_0-logloss:0.762921
[15023]	validation_0-logloss:0.757119
[15024]	validation_0-log

[32m[I 2021-11-22 17:46:47,714][0m Trial 6 finished with value: 0.55 and parameters: {'sketch_eps': 0.09739787748673387, 'max_depth': 18, 'booster': 'gbtree', 'base_score': 0.8032138788332793, 'lambda': 0.6845072442297875, 'alpha': 0.18545302115796997, 'subsample': 0.18854532336250285, 'colsample_bytree': 0.7206580950463729, 'min_child_weight': 10, 'max_delta_step': 10, 'sampling_method': 'gradient_based'}. Best is trial 6 with value: 0.55.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.562587
[15001]	validation_0-logloss:0.562704
[15002]	validation_0-logloss:0.562726
[15003]	validation_0-logloss:0.563293
[15004]	validation_0-logloss:0.564215
[15005]	validation_0-logloss:0.564068
[15006]	validation_0-logloss:0.564283
[15007]	validation_0-logloss:0.563247
[15008]	validation_0-logloss:0.563973
[15009]	validation_0-logloss:0.563946
[15010]	validation_0-logloss:0.563218
[15011]	validation_0-logloss:0.566016
[15012]	validation_0-logloss:0.565288
[15013]	validation_0-logloss:0.56507
[15014]	validation_0-logloss:0.565107
[15015]	validation_0-logloss:0.564909
[15016]	validation_0-logloss:0.567323
[15017]	validation_0-logloss:0.571002
[15018]	validation_0-logloss:0.568108
[15019]	validation_0-logloss:0.568435
[15020]	validation_0-logloss:0.565872
[15021]	validation_0-logloss:0.566587
[15022]	validation_0-logloss:0.568676
[15023]	validation_0-logloss:0.571451
[15024]	validation_0-log

[32m[I 2021-11-22 17:49:21,122][0m Trial 7 finished with value: 0.75 and parameters: {'sketch_eps': 0.736236413770203, 'max_depth': 3, 'booster': 'gbtree', 'base_score': 0.44603685911607516, 'lambda': 0.8222280568259852, 'alpha': 0.5544048924373948, 'subsample': 0.2073163894923863, 'colsample_bytree': 0.7993374789772383, 'min_child_weight': 8, 'max_delta_step': 4, 'sampling_method': 'uniform'}. Best is trial 6 with value: 0.55.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.60878
[15001]	validation_0-logloss:0.609463
[15002]	validation_0-logloss:0.60861
[15003]	validation_0-logloss:0.606695
[15004]	validation_0-logloss:0.608769
[15005]	validation_0-logloss:0.604571
[15006]	validation_0-logloss:0.608219
[15007]	validation_0-logloss:0.60825
[15008]	validation_0-logloss:0.605587
[15009]	validation_0-logloss:0.603847
[15010]	validation_0-logloss:0.604758
[15011]	validation_0-logloss:0.603163
[15012]	validation_0-logloss:0.607726
[15013]	validation_0-logloss:0.609903
[15014]	validation_0-logloss:0.609343
[15015]	validation_0-logloss:0.607275
[15016]	validation_0-logloss:0.605456
[15017]	validation_0-logloss:0.604742
[15018]	validation_0-logloss:0.606421
[15019]	validation_0-logloss:0.60528
[15020]	validation_0-logloss:0.605136
[15021]	validation_0-logloss:0.605966
[15022]	validation_0-logloss:0.606558
[15023]	validation_0-logloss:0.60759
[15024]	validation_0-logloss

[32m[I 2021-11-22 17:52:00,436][0m Trial 8 finished with value: 0.65 and parameters: {'sketch_eps': 0.7336263363050943, 'max_depth': 10, 'booster': 'gbtree', 'base_score': 0.809399814640657, 'lambda': 0.23922621488654902, 'alpha': 0.006120677605206578, 'subsample': 0.48730805081184303, 'colsample_bytree': 0.9146555803161849, 'min_child_weight': 5, 'max_delta_step': 8, 'sampling_method': 'gradient_based'}. Best is trial 6 with value: 0.55.[0m


[1;30;43mStreaming output truncated to the last 5000 lines.[0m
[15000]	validation_0-logloss:0.705946
[15001]	validation_0-logloss:0.70525
[15002]	validation_0-logloss:0.701666
[15003]	validation_0-logloss:0.701991
[15004]	validation_0-logloss:0.705703
[15005]	validation_0-logloss:0.706431
[15006]	validation_0-logloss:0.705045
[15007]	validation_0-logloss:0.701997
[15008]	validation_0-logloss:0.700291
[15009]	validation_0-logloss:0.707322
[15010]	validation_0-logloss:0.709255
[15011]	validation_0-logloss:0.716246
[15012]	validation_0-logloss:0.717123
[15013]	validation_0-logloss:0.712375
[15014]	validation_0-logloss:0.712376
[15015]	validation_0-logloss:0.710162
[15016]	validation_0-logloss:0.706942
[15017]	validation_0-logloss:0.708923
[15018]	validation_0-logloss:0.70601
[15019]	validation_0-logloss:0.710937
[15020]	validation_0-logloss:0.710537
[15021]	validation_0-logloss:0.709431
[15022]	validation_0-logloss:0.71613
[15023]	validation_0-logloss:0.719606
[15024]	validation_0-loglo

[32m[I 2021-11-22 17:54:36,746][0m Trial 9 finished with value: 0.6 and parameters: {'sketch_eps': 0.690647757266719, 'max_depth': 7, 'booster': 'gbtree', 'base_score': 0.246174360049217, 'lambda': 0.5874900260553897, 'alpha': 0.6546187115152561, 'subsample': 0.38851820392707603, 'colsample_bytree': 0.7899246829373207, 'min_child_weight': 8, 'max_delta_step': 6, 'sampling_method': 'gradient_based'}. Best is trial 6 with value: 0.55.[0m


In [None]:
xg_claf=xgb.XGBClassifier(**best_fit)
xg_claf.fit(train_x,train_y)

XGBClassifier(alpha=0.18545302115796997, base_score=0.8032138788332793,
              colsample_bytree=0.7206580950463729, lambda=0.6845072442297875,
              max_delta_step=10, max_depth=18, min_child_weight=10,
              sampling_method='gradient_based', sketch_eps=0.09739787748673387,
              subsample=0.18854532336250285)

In [None]:
print("Accuracy:")
xg_claf.score(test_x,test_y)

Accuracy:


0.6