## Brain Tumour Detection model using XGBoost Classifier and Hyperparameter Optimization using Scikit-Optimize:

In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [1]:
# !pip install scikit-optimize

In [13]:
# Importing Scikit-Optimize
import skopt
print('skopt %s' % skopt.__version__)

skopt 0.9.0


In [14]:
# Importing required Libraries:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score
from sklearn.model_selection import cross_val_score
from skopt.space import Real, Integer
from skopt.utils import use_named_args
from skopt import gp_minimize
from skopt.plots import plot_convergence

In [15]:
# Reading the dataset:
df = pd.read_csv('/content/drive/MyDrive/Colab Notebooks/HPO_Scikit_Optimize_3000/Zernike_Moments_YN_3000.csv', header=None)

In [16]:
# Shuffling the whole dataset:
df = df.sample(frac=1, random_state=0).reset_index(drop=True)

In [17]:
# Converting categorical values of Target feature into numerical:
df[289].replace(['YES','NO'], [1,0], inplace=True)

In [18]:
# Independent and Dependent features:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [19]:
# Train-Test spilt
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

### Scikit Optimize

In [20]:
# Listing Parameters Range:
space = [
         Integer(100, 1000, name = 'n_estimators'),
         Real(10**-5, 10**0, "log-uniform", name = 'learning_rate'),
         Integer(1, 10, name = 'max_depth'),
         Integer(1, 10, name = 'min_child_weight')
        ]

xgb = XGBClassifier()

# scikit-learn estimator parameters
@use_named_args(space)
def objective(**params):
    
    xgb.set_params(**params)
    return -np.mean(cross_val_score(xgb, 
                                    X, y, 
                                    cv=5, 
                                    n_jobs=-1,
                                    scoring="neg_mean_absolute_error"))

In [21]:
res_gp = gp_minimize(objective, space, n_calls=50, random_state=0)

"Best score=%.2f" % res_gp.fun

'Best score=0.03'

In [22]:
print(
      """Best parameters:
      - n_estimators = %d
      - learning_rate = %.6f
      - max_depth = %d
      - min_child_weight = %d
      """ 
      % (res_gp.x[0], res_gp.x[1], res_gp.x[2], res_gp.x[3])
      )

Best parameters:
      - n_estimators = 1000
      - learning_rate = 0.036110
      - max_depth = 6
      - min_child_weight = 1
      


In [24]:
# Using Best parameters found from HyperParameter Optimization done using Scikit-Optimize:
xgb = XGBClassifier(
                    n_estimators = 1000,
                    learning_rate = 0.0361,
                    max_depth = 6,
                    min_child_weight = 1
                   )

In [25]:
# Fitting the Training data:
xgb.fit(x_train, y_train)

XGBClassifier(learning_rate=0.0361, max_depth=6, n_estimators=1000)

In [26]:
# Predicting the test data:
y_pred = xgb.predict(x_test)

### Results:

In [27]:
print("Accuracy :", round(accuracy_score(y_test, y_pred), 2)*100, "%")

Accuracy : 96.0 %


In [28]:
print("Precision :", round(precision_score(y_test, y_pred), 2)*100, "%")

Precision : 97.0 %


In [29]:
print("Recall :", round(recall_score(y_test, y_pred), 2)*100, "%")

Recall : 97.0 %


In [30]:
print("f1_Score :", round(f1_score(y_test, y_pred), 2)*100, "%")

f1_Score : 97.0 %


In [31]:
print("Confusion Matrix :")
print(confusion_matrix(y_test, y_pred))

Confusion Matrix :
[[196  11]
 [ 11 334]]


In [32]:
print("Classification Report :")
print(classification_report(y_test, y_pred))

Classification Report :
              precision    recall  f1-score   support

           0       0.95      0.95      0.95       207
           1       0.97      0.97      0.97       345

    accuracy                           0.96       552
   macro avg       0.96      0.96      0.96       552
weighted avg       0.96      0.96      0.96       552



In [33]:
# Creating a pickle file for the classifier
import pickle
filename = 'HPO_ScikitOptimize_3000.pkl'
pickle.dump(xgb, open(filename, 'wb'))