In [1]:
import xgboost as xgb

from sklearn.metrics import roc_auc_score

from hpsklearn import HyperoptEstimator, svc, xgboost_classification, any_classifier, any_preprocessing

In [2]:

from hyperopt import STATUS_OK, Trials, fmin, hp, tpe


In [3]:
from sklearn.model_selection import RandomizedSearchCV, GridSearchCV

import matplotlib.pyplot as plt
import pandas as pd

from sklearn.linear_model import LogisticRegression, LinearRegression
from sklearn.metrics import accuracy_score, confusion_matrix, mean_absolute_error
from sklearn.model_selection import train_test_split 
from sklearn.neural_network import MLPClassifier


In [4]:
merged_df = pd.read_csv('processed_data/prepared_featured_balanced_sample_5.csv').drop('Unnamed: 0',axis=1)

In [5]:
x = merged_df.drop(['event_target'],axis=1)
y = merged_df['event_target']

X_train, X_test, y_train, y_test = train_test_split(x, y, test_size=0.2, random_state=42)

In [6]:
def fit_and_result(clf):
    clf.fit(X_train, y_train)

    clf_train_pred = clf.predict(X_train)
    clf_test_pred = clf.predict(X_test)

    print('Точность на тренировочной выборке')
    print(accuracy_score(y_train, clf_train_pred))
    print('Точность на тестовой выборке')
    print(accuracy_score(y_test, clf_test_pred))
    
    
    score = roc_auc_score(y_test, clf.predict_proba(X_test)[:,1])
    print(f"\n\nROC AUC TEST: {score:.4f}")

In [7]:
import warnings
warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)
warnings.simplefilter("ignore")

In [8]:
from hyperopt import hp, STATUS_OK, Trials, fmin, tpe
from sklearn.model_selection import cross_val_score


def objective(space):

    warnings.filterwarnings(action='ignore', category=DeprecationWarning)
    classifier = xgb.XGBClassifier(n_estimators = space['n_estimators'],
                            max_depth = int(space['max_depth']),
                            learning_rate = space['learning_rate'],
                            gamma = space['gamma'],
                            min_child_weight = space['min_child_weight'],
                            subsample = space['subsample'],
                            colsample_bytree = space['colsample_bytree'],
                            booster = 'gbtree'
                            )
    
    classifier.fit(X_train, y_train)

    # Applying k-Fold Cross Validation
    from sklearn.model_selection import cross_val_score
    accuracies = cross_val_score(estimator = classifier, scoring='roc_auc', X = X_test, y = y_test, cv = 10)
    CrossValMean = accuracies.mean()
    
    print('---------')
    print("CrossValMean AUC SCORE:", CrossValMean)


    return{'loss':1-CrossValMean, 'status': STATUS_OK }

space = {
     'reg_alpha' : hp.quniform('reg_alpha', 40,180,1),
        'reg_lambda' : hp.uniform('reg_lambda', 0,1),
    'colsample_bytree' : hp.uniform('colsample_bytree', 0.5,1),
    
    'max_depth' : hp.choice('max_depth', range(5, 30, 1)),
    'learning_rate' : hp.quniform('learning_rate', 0.01, 0.5, 0.01),
    'n_estimators' : hp.choice('n_estimators', range(20, 800, 50)),
    'gamma' : hp.quniform('gamma', 0, 0.50, 0.01),
    'min_child_weight' : hp.quniform('min_child_weight', 1, 10, 1),
    'subsample' : hp.quniform('subsample', 0.1, 1, 0.01),
    'colsample_bytree' : hp.quniform('colsample_bytree', 0.1, 1.0, 0.01),
    'eta' : hp.quniform('eta', 0.1, 1 , 0.1),
  #  'tree_method' : hp.choice('tree_method', ('auto', 'exact', 'approx', 'hist'))
}

trials = Trials()
best = fmin(fn=objective,
            space=space,
            algo=tpe.suggest,
            max_evals=200,
            trials=trials)

print("Best: ", best)


# Fitting XGBoost to the Training set
from xgboost import XGBClassifier
classifier = XGBClassifier(n_estimators = best['n_estimators'],
                            max_depth = best['max_depth'],
                            learning_rate = best['learning_rate'],
                            gamma = best['gamma'],
                            min_child_weight = best['min_child_weight'],
                            subsample = best['subsample'],
                            colsample_bytree = best['colsample_bytree'],
                            eta = best['eta'],
                            tree_method = best['tree_method']
                            )

classifier.fit(X_train, y_train)


---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.65855237148217                                                                                                       
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6250307376983804                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6485965424451797                      

0.6649034081299089                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6315916152653418                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6517832223952026                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                 

CrossValMean AUC SCORE:                                                                                                
0.6325466969711098                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6153999687019999                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6010177914175225                                                                                                     
---------                               

---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6624943577346329                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6308572404390012                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6259410187564632                      

0.6071081119139119                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6162382078905562                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6506172981474274                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                 

CrossValMean AUC SCORE:                                                                                                
0.6467116667070307                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6501300557209776                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6318981723182249                                                                                                     
---------                               

---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6419759092473691                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6250340498375635                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6473504101189989                      

0.6592269553186119                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6558028370184041                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.663430209119013                                                                                                      
---------                                                                                                              
CrossValMean AUC SCORE:                 

CrossValMean AUC SCORE:                                                                                                
0.6510882544473983                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6229800923036237                                                                                                     
---------                                                                                                              
CrossValMean AUC SCORE:                                                                                                
0.6046323801273452                                                                                                     
---------                               

XGBoostError: Invalid Input: '1', valid values are: {'approx', 'auto', 'exact', 'gpu_hist', 'hist'}

In [26]:
best_params = {'colsample_bytree': 0.5, 'eta': 0.8, 'gamma': 0.24, 'learning_rate': 0.04, 'max_depth': 5, 'min_child_weight': 4.0, 'n_estimators': 27, 'subsample': 0.75}


best_2 = {'colsample_bytree': 0.41000000000000003, 'eta': 1.0, 'gamma': 0.02, 'learning_rate': 0.03, 'max_depth': 0, 'min_child_weight': 1.0, 'n_estimators': 7, 'reg_alpha': 169.0, 'reg_lambda': 0.11659690697151562, 'subsample': 0.31, 'tree_method': 'hist'}

In [27]:
xgb_final_clf = XGBClassifier(**best_params)

In [28]:
fit_and_result(xgb_final_clf)

Точность на тренировочной выборке
0.601772899957788
Точность на тестовой выборке
0.5995812791247384


ROC AUC TEST: 0.6589


In [23]:
cross_val_score(estimator = xgb_final_clf, scoring='roc_auc', X = X_test, y = y_test, cv = 10)

array([0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5, 0.5])