## Brain Tumour Detection model using XGBoost Classifier and Hyperparameter Optimization using Scikit-Optimize:

In [1]:
# !pip install scikit-optimize

In [2]:
# Importing Scikit-Optimize
import skopt
print('skopt %s' % skopt.__version__)

skopt 0.9.0


In [4]:
# Importing required Libraries:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.plots import plot_convergence

In [5]:
# Reading the dataset:
df = pd.read_csv('Zernike_Moments_YN_250.csv', header=None)

In [6]:
# Shuffling the whole dataset:
df = df.sample(frac=1, random_state=0).reset_index(drop=True)

In [7]:
# Converting categorical values of Target feature into numerical:
df[289].replace(['YES','NO'], [1,0], inplace=True)

In [8]:
# Independent and Dependent features:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [9]:
# Train-Test spilt
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

### Scikit Optimize

In [10]:
# Listing Parameters Range:
space = [
         Integer(100, 1000, name = 'n_estimators'),
         Real(10**-5, 10**0, "log-uniform", name = 'learning_rate'),
         Integer(1, 10, name = 'max_depth'),
         Integer(1, 10, name = 'min_child_weight')
        ]

xgb = XGBClassifier()

# scikit-learn estimator parameters
@use_named_args(space)
def objective(**params):
    
    xgb.set_params(**params)
    return -np.mean(cross_val_score(xgb, 
                                    X, y, 
                                    cv=5, 
                                    n_jobs=-1,
                                    scoring="neg_mean_absolute_error"))

In [11]:
res_gp = gp_minimize(objective, space, n_calls=50, random_state=0)

"Best score=%.2f" % res_gp.fun

'Best score=0.18'

In [12]:
print(
      """Best parameters:
      - n_estimators = %d
      - learning_rate = %.6f
      - max_depth = %d
      - min_child_weight = %d
      """ 
      % (res_gp.x[0], res_gp.x[1], res_gp.x[2], res_gp.x[3])
      )

Best parameters:
      - n_estimators = 1000
      - learning_rate = 0.020285
      - max_depth = 10
      - min_child_weight = 1
      


In [13]:
# Using Best parameters found from HyperParameter Optimization done using Scikit-Optimize:
xgb = XGBClassifier(
                    n_estimators=1000,
                    learning_rate = 0.0202,
                    max_depth = 10,
                    min_child_weight = 1
                   )

In [15]:
# Fitting the Training data:
xgb.fit(x_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.0202,
              max_delta_step=0, max_depth=10, min_child_weight=1, missing=nan,
              monotone_constraints='()', n_estimators=1000, n_jobs=12,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [16]:
# Predicting the test data:
y_pred = xgb.predict(x_test)

### Results:

In [17]:
print("Accuracy :", round(accuracy_score(y_test, y_pred), 2)*100, "%")

Accuracy : 86.0 %


In [18]:
print("Precision :", round(precision_score(y_test, y_pred), 2)*100, "%")

Precision : 85.0 %


In [19]:
print("Recall :", round(recall_score(y_test, y_pred), 2)*100, "%")

Recall : 94.0 %


In [20]:
print("f1_Score :", round(f1_score(y_test, y_pred), 2)*100, "%")

f1_Score : 89.0 %


In [21]:
print("Confusion Matrix :")
print(confusion_matrix(y_test, y_pred))

Confusion Matrix :
[[14  5]
 [ 2 29]]


In [22]:
print("Classification Report :")
print(classification_report(y_test, y_pred))

Classification Report :
              precision    recall  f1-score   support

           0       0.88      0.74      0.80        19
           1       0.85      0.94      0.89        31

    accuracy                           0.86        50
   macro avg       0.86      0.84      0.85        50
weighted avg       0.86      0.86      0.86        50

