In [1]:
import numpy as np
from xgboost import XGBClassifier
from bayes_opt import BayesianOptimization
from sklearn.model_selection import train_test_split
import xgboost as xgb
from pathlib import Path
import os

In [2]:
def xgb_classifier(n_estimators, max_depth, reg_alpha, reg_lambda, min_child_weight, num_boost_round, gamma):
    params = {"booster": 'gbtree',
              "objective": 'multi:softmax',
              "eval_metric": "auc",
#               "is_unbalance": True,
              "n_estimators": int(n_estimators),
              "max_depth": int(max_depth),
              "reg_alpha": reg_alpha,
              "reg_lambda": reg_lambda,
              "gamma": gamma,
              "num_class": 3, 
              "min_child_weight": int(min_child_weight),
              "learning_rate": 0.01,
              "subsample_freq": 5,
              "verbosity": 0,
              "num_boost_round": int(num_boost_round)}
    cv_result = xgb.cv(params,
                       train_data,
                       1000,
                       early_stopping_rounds=100,
                       stratified=True,
                       nfold=3)
    return cv_result['test-auc-mean'].iloc[-1]


In [3]:
target_address = os.path.join(Path(os.getcwd()).parent,'Window_capture\\Data\\command_keys.npy')
screenshot_address = os.path.join(Path(os.getcwd()).parent,'Window_capture\\Data\\screenshots.npy')

labels = np.load(target_address)
images = np.load(screenshot_address, allow_pickle = True)

In [4]:
labels = labels[1500:2000]
images = images[1500:2000,:]
print("Dimensions for Targets: ", np.unique(labels, return_counts=True), "Dimensions for Images", images.shape)

Dimensions for Targets:  (array([0, 1, 2]), array([366,  35,  99], dtype=int64)) Dimensions for Images (500, 129600)


In [5]:
# X_train, X_test, y_train, y_test = make_dataset(10000, z=100)
X_train, X_test, y_train, y_test = train_test_split(images, labels, test_size = 0.25)

train_data = xgb.DMatrix(X_train, y_train)

In [6]:
xgb_bo = BayesianOptimization(xgb_classifier, {"n_estimators": (10, 100),
                                              'max_depth': (5, 40),
                                              'reg_alpha': (0.0, 0.1),
                                              'reg_lambda': (0.0, 0.1),
                                              'min_child_weight': (1, 10),
                                              'num_boost_round': (100, 1000),
                                              "gamma": (0, 10)
                                              })
xgb_bo.maximize(n_iter=15, init_points=2)

|   iter    |  target   |   gamma   | max_depth | min_ch... | n_esti... | num_bo... | reg_alpha | reg_la... |
-------------------------------------------------------------------------------------------------------------
| [0m 1       [0m | [0m 0.9854  [0m | [0m 7.345   [0m | [0m 24.7    [0m | [0m 1.83    [0m | [0m 63.53   [0m | [0m 405.6   [0m | [0m 0.003643[0m | [0m 0.01105 [0m |
| [0m 2       [0m | [0m 0.9854  [0m | [0m 6.718   [0m | [0m 19.69   [0m | [0m 6.454   [0m | [0m 64.04   [0m | [0m 958.8   [0m | [0m 0.0471  [0m | [0m 0.05868 [0m |
| [95m 3       [0m | [95m 0.9859  [0m | [95m 1.416   [0m | [95m 5.716   [0m | [95m 4.78    [0m | [95m 13.84   [0m | [95m 103.3   [0m | [95m 0.08341 [0m | [95m 0.0634  [0m |
| [95m 4       [0m | [95m 0.9868  [0m | [95m 1.053   [0m | [95m 8.374   [0m | [95m 3.059   [0m | [95m 15.56   [0m | [95m 107.3   [0m | [95m 0.008024[0m | [95m 0.09695 [0m |
| [95m 5       [0m | [95m 0.988

In [7]:
#Extracting the best parameters
params = xgb_bo.max['params']
print(params)



{'gamma': 3.588573520537012, 'max_depth': 6.575482672783876, 'min_child_weight': 1.392918508283413, 'n_estimators': 11.563881716355775, 'num_boost_round': 114.12223157924846, 'reg_alpha': 0.07833114170487387, 'reg_lambda': 0.09867149140295799}


In [9]:
#Converting the max_depth and n_estimator values from float to int

params['gamma']= int(params['gamma'])
params['max_depth']= int(params['max_depth'])
params['n_estimators']= int(params['n_estimators'])
params['num_boost_round']= int(params['num_boost_round'])

print(params)

# #Initialize an XGBClassifier with the tuned parameters and fit the training data
# from xgboost import XGBClassifier
# classifier2 = XGBClassifier(**params).fit(text_tfidf, clean_data_train['author'])

# #predicting for training set
# train_p2 = classifier2.predict(text_tfidf)

# #Looking at the classification report
# print(classification_report(train_p2, clean_data_train['author']))

{'gamma': 3, 'max_depth': 6, 'min_child_weight': 1.392918508283413, 'n_estimators': 11, 'num_boost_round': 114, 'reg_alpha': 0.07833114170487387, 'reg_lambda': 0.09867149140295799}
