In [2]:
import pandas as pd
import numpy as np
from sklearn.preprocessing import StandardScaler
from sklearn.model_selection import train_test_split, GridSearchCV
from xgboost import XGBClassifier
from sklearn.metrics import confusion_matrix, accuracy_score

In [3]:
'''DATA LOAD'''
df = pd.read_excel('Final_data_binary.xlsx', sheet_name='AIN')
X = df.drop('ocena', axis=1)
y = np.array(df['ocena'])
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=101)

In [4]:
'''SCALING'''
sc = StandardScaler()
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [7]:
xgb = XGBClassifier()
'''GridSearch for best parameters'''

parameters = [ {
    'max_depth': [3, 4, 5, 6, 8, 10, 12, 15],
    'learning_rate': [0.01, 0.05, 0.1],
    'n_estimators': [100, 500, 1000],
}]


grid_search = GridSearchCV(estimator=xgb, param_grid=parameters, scoring='accuracy', cv=10, n_jobs=-1)
grid_search.fit(X_train, y_train)
best_accuracy = grid_search.best_score_
best_parameters = grid_search.best_params_
print(f'Best accuracy: {best_accuracy*100}')
print(f'Best parameters: {best_parameters}')

Best accuracy: 85.15625312456254
Best parameters: {'learning_rate': 0.01, 'max_depth': 10, 'n_estimators': 500}


In [13]:
'''XGBoost with best parameters'''
xgb = XGBClassifier(learning_rate=0.01, max_depth=10, n_estimators=500)
xgb.fit(X_train, y_train)
predictions = xgb.predict(X_test)

In [12]:
'''Confusion Matrix'''
cm = confusion_matrix(y_test, predictions)
ac_s = accuracy_score(y_test, predictions)
print('Confusion matrix:\n', cm)
print('Accuracy score: ', ac_s)

Confusion matrix:
 [[  14  233]
 [  14 1321]]
Accuracy score:  0.8438685208596713
