## Brain Tumour Detection model using XGBoost Classifier and Hyperparameter Optimization using Optuna:

In [22]:
# !pip install optuna

In [23]:
# Importing Optuna
import optuna
print('optuna %s' % optuna.__version__)

optuna 2.10.0


In [24]:
# Importing required Libraries:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

In [25]:
# Creating a Objective function:
def objective(trial):
    
    # Reading the dataset:
    df = pd.read_csv('Zernike_Moments_YN_250.csv', header=None)
    
    # Shuffling the whole dataset:
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)
    
    # Converting categorical values of Target feature into numerical:
    df[289].replace(['YES','NO'], [1,0], inplace=True)
    
    # Independent and Dependent features:
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    
    # Train-Test spilt:
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

    # Listing Parameters Range:
    param = {
            "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1.0),
            "n_estimators": trial.suggest_int("n_estimators", 100,1000),
            "max_depth": trial.suggest_int("max_depth", 1,10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1,10)
            }
    
    # Using XGBoost Classifier:
    xgb = XGBClassifier()
    xgb.fit(x_train, y_train)
    y_pred = xgb.predict(x_test)
    
    return accuracy_score(y_test, y_pred)

if __name__ == "__main__":
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: ", len(study.trials))
    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))
    print("  Params: ")
    
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2022-04-08 12:19:19,083][0m A new study created in memory with name: no-name-0ac3cce3-e4cd-4674-82e5-ff4949aafead[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:19,414][0m Trial 0 finished with value: 0.86 and parameters: {'learning_rate': 0.29347467516879233, 'n_estimators': 146, 'max_depth': 1, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:19,703][0m Trial 1 finished with value: 0.86 and parameters: {'learning_rate': 0.5893455703084653, 'n_estimators': 759, 'max_depth': 4, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:19,997][0m Trial 2 finished with value: 0.86 and parameters: {'learning_rate': 0.04443289835592699, 'n_estimators': 374, 'max_depth': 9, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:20,308][0m Trial 3 finished with value: 0.86 and parameters: {'learning_rate': 0.48000315011140976, 'n_estimators': 186, 'max_depth': 8, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:20,613][0m Trial 4 finished with value: 0.86 and parameters: {'learning_rate': 0.8678003493067186, 'n_estimators': 602, 'max_depth': 9, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:20,917][0m Trial 5 finished with value: 0.86 and parameters: {'learning_rate': 0.5171499969261061, 'n_estimators': 287, 'max_depth': 6, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:21,205][0m Trial 6 finished with value: 0.86 and parameters: {'learning_rate': 0.7014855946754609, 'n_estimators': 486, 'max_depth': 3, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:21,487][0m Trial 7 finished with value: 0.86 and parameters: {'learning_rate': 0.9463933244489542, 'n_estimators': 945, 'max_depth': 3, 'min_child_weight': 5}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:21,773][0m Trial 8 finished with value: 0.86 and parameters: {'learning_rate': 0.7862913909735595, 'n_estimators': 744, 'max_depth': 1, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:22,088][0m Trial 9 finished with value: 0.86 and parameters: {'learning_rate': 0.5023388773543327, 'n_estimators': 914, 'max_depth': 5, 'min_child_weight': 5}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:22,411][0m Trial 10 finished with value: 0.86 and parameters: {'learning_rate': 0.20178024244749238, 'n_estimators': 105, 'max_depth': 1, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:22,728][0m Trial 11 finished with value: 0.86 and parameters: {'learning_rate': 0.27365233918107773, 'n_estimators': 752, 'max_depth': 3, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:23,026][0m Trial 12 finished with value: 0.86 and parameters: {'learning_rate': 0.38325496101323386, 'n_estimators': 633, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:23,335][0m Trial 13 finished with value: 0.86 and parameters: {'learning_rate': 0.619543877349982, 'n_estimators': 787, 'max_depth': 2, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:23,635][0m Trial 14 finished with value: 0.86 and parameters: {'learning_rate': 0.24520328459009566, 'n_estimators': 426, 'max_depth': 7, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:23,933][0m Trial 15 finished with value: 0.86 and parameters: {'learning_rate': 0.3658284168894706, 'n_estimators': 275, 'max_depth': 4, 'min_child_weight': 1}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:24,234][0m Trial 16 finished with value: 0.86 and parameters: {'learning_rate': 0.004764323383459279, 'n_estimators': 642, 'max_depth': 1, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:24,538][0m Trial 17 finished with value: 0.86 and parameters: {'learning_rate': 0.16067234801320984, 'n_estimators': 445, 'max_depth': 7, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:24,834][0m Trial 18 finished with value: 0.86 and parameters: {'learning_rate': 0.3575370975335453, 'n_estimators': 275, 'max_depth': 4, 'min_child_weight': 1}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:25,129][0m Trial 19 finished with value: 0.86 and parameters: {'learning_rate': 0.015526167485018588, 'n_estimators': 563, 'max_depth': 1, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:25,425][0m Trial 20 finished with value: 0.86 and parameters: {'learning_rate': 0.1369480505475802, 'n_estimators': 108, 'max_depth': 10, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:25,728][0m Trial 21 finished with value: 0.86 and parameters: {'learning_rate': 0.3318364240460348, 'n_estimators': 271, 'max_depth': 7, 'min_child_weight': 1}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:26,019][0m Trial 22 finished with value: 0.86 and parameters: {'learning_rate': 0.07216699615645403, 'n_estimators': 536, 'max_depth': 2, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:26,323][0m Trial 23 finished with value: 0.86 and parameters: {'learning_rate': 0.12464525149649494, 'n_estimators': 102, 'max_depth': 10, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:26,627][0m Trial 24 finished with value: 0.86 and parameters: {'learning_rate': 0.2762263374606288, 'n_estimators': 190, 'max_depth': 10, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:26,915][0m Trial 25 finished with value: 0.86 and parameters: {'learning_rate': 0.08974790533682898, 'n_estimators': 356, 'max_depth': 2, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:27,213][0m Trial 26 finished with value: 0.86 and parameters: {'learning_rate': 0.10477040418810779, 'n_estimators': 178, 'max_depth': 2, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:27,503][0m Trial 27 finished with value: 0.86 and parameters: {'learning_rate': 0.26742521669375485, 'n_estimators': 193, 'max_depth': 10, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:27,803][0m Trial 28 finished with value: 0.86 and parameters: {'learning_rate': 0.4210216579209556, 'n_estimators': 358, 'max_depth': 6, 'min_child_weight': 5}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:28,093][0m Trial 29 finished with value: 0.86 and parameters: {'learning_rate': 0.10225043764741618, 'n_estimators': 350, 'max_depth': 2, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:28,383][0m Trial 30 finished with value: 0.86 and parameters: {'learning_rate': 0.20508083609853217, 'n_estimators': 192, 'max_depth': 3, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:28,638][0m Trial 31 finished with value: 0.86 and parameters: {'learning_rate': 0.4350324549326654, 'n_estimators': 208, 'max_depth': 6, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:28,866][0m Trial 32 finished with value: 0.86 and parameters: {'learning_rate': 0.566053520688933, 'n_estimators': 348, 'max_depth': 9, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:29,082][0m Trial 33 finished with value: 0.86 and parameters: {'learning_rate': 0.22059561508768533, 'n_estimators': 370, 'max_depth': 3, 'min_child_weight': 5}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:19:29,363][0m Trial 34 finished with value: 0.86 and parameters: {'learning_rate': 0.4271675458866001, 'n_estimators': 220, 'max_depth': 5, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:29,573][0m Trial 35 finished with value: 0.86 and parameters: {'learning_rate': 0.005046948261855042, 'n_estimators': 132, 'max_depth': 8, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:29,778][0m Trial 36 finished with value: 0.86 and parameters: {'learning_rate': 0.3313813134980822, 'n_estimators': 276, 'max_depth': 8, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:29,986][0m Trial 37 finished with value: 0.86 and parameters: {'learning_rate': 0.17241668116971406, 'n_estimators': 140, 'max_depth': 7, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:30,197][0m Trial 38 finished with value: 0.86 and parameters: {'learning_rate': 0.30272298098687844, 'n_estimators': 503, 'max_depth': 9, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:30,405][0m Trial 39 finished with value: 0.86 and parameters: {'learning_rate': 0.04817458972706741, 'n_estimators': 238, 'max_depth': 8, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:30,613][0m Trial 40 finished with value: 0.86 and parameters: {'learning_rate': 0.13082025502177208, 'n_estimators': 849, 'max_depth': 10, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:30,823][0m Trial 41 finished with value: 0.86 and parameters: {'learning_rate': 0.05681852656714355, 'n_estimators': 165, 'max_depth': 2, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:31,032][0m Trial 42 finished with value: 0.86 and parameters: {'learning_rate': 0.09339251179031187, 'n_estimators': 151, 'max_depth': 1, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:31,243][0m Trial 43 finished with value: 0.86 and parameters: {'learning_rate': 0.17493526667327036, 'n_estimators': 324, 'max_depth': 2, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:31,447][0m Trial 44 finished with value: 0.86 and parameters: {'learning_rate': 0.21418437657528389, 'n_estimators': 180, 'max_depth': 9, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:31,662][0m Trial 45 finished with value: 0.86 and parameters: {'learning_rate': 0.2759680599419845, 'n_estimators': 411, 'max_depth': 4, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:31,868][0m Trial 46 finished with value: 0.86 and parameters: {'learning_rate': 0.39763998507218346, 'n_estimators': 239, 'max_depth': 3, 'min_child_weight': 5}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:32,073][0m Trial 47 finished with value: 0.86 and parameters: {'learning_rate': 0.4709459200341251, 'n_estimators': 315, 'max_depth': 6, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:32,286][0m Trial 48 finished with value: 0.86 and parameters: {'learning_rate': 0.7587006705492914, 'n_estimators': 400, 'max_depth': 2, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:32,496][0m Trial 49 finished with value: 0.86 and parameters: {'learning_rate': 0.6288039441736311, 'n_estimators': 317, 'max_depth': 1, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:32,720][0m Trial 50 finished with value: 0.86 and parameters: {'learning_rate': 0.5369323242712212, 'n_estimators': 472, 'max_depth': 3, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:32,933][0m Trial 51 finished with value: 0.86 and parameters: {'learning_rate': 0.4448216018092824, 'n_estimators': 216, 'max_depth': 6, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:33,144][0m Trial 52 finished with value: 0.86 and parameters: {'learning_rate': 0.5607630558981158, 'n_estimators': 361, 'max_depth': 6, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:33,352][0m Trial 53 finished with value: 0.86 and parameters: {'learning_rate': 0.5936650360795677, 'n_estimators': 322, 'max_depth': 5, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:33,568][0m Trial 54 finished with value: 0.86 and parameters: {'learning_rate': 0.21756750873417782, 'n_estimators': 253, 'max_depth': 4, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:33,776][0m Trial 55 finished with value: 0.86 and parameters: {'learning_rate': 0.23847066835052383, 'n_estimators': 391, 'max_depth': 3, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:33,988][0m Trial 56 finished with value: 0.86 and parameters: {'learning_rate': 0.657759336739426, 'n_estimators': 207, 'max_depth': 5, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:34,200][0m Trial 57 finished with value: 0.86 and parameters: {'learning_rate': 0.49323591632610336, 'n_estimators': 242, 'max_depth': 4, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:34,408][0m Trial 58 finished with value: 0.86 and parameters: {'learning_rate': 0.4621818146122236, 'n_estimators': 132, 'max_depth': 8, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:34,616][0m Trial 59 finished with value: 0.86 and parameters: {'learning_rate': 0.5294996280964467, 'n_estimators': 103, 'max_depth': 9, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:34,824][0m Trial 60 finished with value: 0.86 and parameters: {'learning_rate': 0.3325180873218486, 'n_estimators': 281, 'max_depth': 8, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:35,043][0m Trial 61 finished with value: 0.86 and parameters: {'learning_rate': 0.33301865515200973, 'n_estimators': 149, 'max_depth': 7, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:35,256][0m Trial 62 finished with value: 0.86 and parameters: {'learning_rate': 0.3001725411454068, 'n_estimators': 702, 'max_depth': 9, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:35,464][0m Trial 63 finished with value: 0.86 and parameters: {'learning_rate': 0.1698458797249358, 'n_estimators': 507, 'max_depth': 8, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:35,683][0m Trial 64 finished with value: 0.86 and parameters: {'learning_rate': 0.02962490410419321, 'n_estimators': 143, 'max_depth': 8, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:35,907][0m Trial 65 finished with value: 0.86 and parameters: {'learning_rate': 0.3040724033917296, 'n_estimators': 227, 'max_depth': 7, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:36,136][0m Trial 66 finished with value: 0.86 and parameters: {'learning_rate': 0.13838879957256062, 'n_estimators': 586, 'max_depth': 8, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:36,360][0m Trial 67 finished with value: 0.86 and parameters: {'learning_rate': 0.0010474954271313258, 'n_estimators': 873, 'max_depth': 7, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:36,575][0m Trial 68 finished with value: 0.86 and parameters: {'learning_rate': 0.17260135945395302, 'n_estimators': 968, 'max_depth': 10, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:36,793][0m Trial 69 finished with value: 0.86 and parameters: {'learning_rate': 0.06168653078438684, 'n_estimators': 787, 'max_depth': 9, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:37,003][0m Trial 70 finished with value: 0.86 and parameters: {'learning_rate': 0.04853153786814962, 'n_estimators': 166, 'max_depth': 1, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:37,213][0m Trial 71 finished with value: 0.86 and parameters: {'learning_rate': 0.7595777218043155, 'n_estimators': 431, 'max_depth': 1, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:37,418][0m Trial 72 finished with value: 0.86 and parameters: {'learning_rate': 0.8858863168708822, 'n_estimators': 474, 'max_depth': 6, 'min_child_weight': 5}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:37,630][0m Trial 73 finished with value: 0.86 and parameters: {'learning_rate': 0.653949854032783, 'n_estimators': 308, 'max_depth': 6, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:37,838][0m Trial 74 finished with value: 0.86 and parameters: {'learning_rate': 0.5673062857763026, 'n_estimators': 319, 'max_depth': 5, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:38,046][0m Trial 75 finished with value: 0.86 and parameters: {'learning_rate': 0.6109294398183521, 'n_estimators': 392, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:38,263][0m Trial 76 finished with value: 0.86 and parameters: {'learning_rate': 0.7331561147671728, 'n_estimators': 449, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:38,473][0m Trial 77 finished with value: 0.86 and parameters: {'learning_rate': 0.8183802078631466, 'n_estimators': 394, 'max_depth': 4, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:38,694][0m Trial 78 finished with value: 0.86 and parameters: {'learning_rate': 0.5303557814678481, 'n_estimators': 297, 'max_depth': 4, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:38,903][0m Trial 79 finished with value: 0.86 and parameters: {'learning_rate': 0.6548547001016735, 'n_estimators': 256, 'max_depth': 5, 'min_child_weight': 9}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:39,114][0m Trial 80 finished with value: 0.86 and parameters: {'learning_rate': 0.5073865616568012, 'n_estimators': 214, 'max_depth': 5, 'min_child_weight': 10}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:39,323][0m Trial 81 finished with value: 0.86 and parameters: {'learning_rate': 0.48327620870532295, 'n_estimators': 202, 'max_depth': 4, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:39,533][0m Trial 82 finished with value: 0.86 and parameters: {'learning_rate': 0.5755988909733085, 'n_estimators': 113, 'max_depth': 4, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:39,745][0m Trial 83 finished with value: 0.86 and parameters: {'learning_rate': 0.6962789193989503, 'n_estimators': 250, 'max_depth': 4, 'min_child_weight': 4}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:39,953][0m Trial 84 finished with value: 0.86 and parameters: {'learning_rate': 0.2543725348065239, 'n_estimators': 126, 'max_depth': 5, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:40,163][0m Trial 85 finished with value: 0.86 and parameters: {'learning_rate': 0.3693331120516679, 'n_estimators': 272, 'max_depth': 4, 'min_child_weight': 1}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:40,376][0m Trial 86 finished with value: 0.86 and parameters: {'learning_rate': 0.23399508552928744, 'n_estimators': 101, 'max_depth': 8, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:40,589][0m Trial 87 finished with value: 0.86 and parameters: {'learning_rate': 0.34587372346174117, 'n_estimators': 162, 'max_depth': 9, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:40,803][0m Trial 88 finished with value: 0.86 and parameters: {'learning_rate': 0.40336314785051025, 'n_estimators': 613, 'max_depth': 8, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:41,012][0m Trial 89 finished with value: 0.86 and parameters: {'learning_rate': 0.3004963253172995, 'n_estimators': 667, 'max_depth': 9, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:41,224][0m Trial 90 finished with value: 0.86 and parameters: {'learning_rate': 0.4577531444658883, 'n_estimators': 148, 'max_depth': 9, 'min_child_weight': 3}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:41,438][0m Trial 91 finished with value: 0.86 and parameters: {'learning_rate': 0.29856547036203146, 'n_estimators': 133, 'max_depth': 7, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:41,643][0m Trial 92 finished with value: 0.86 and parameters: {'learning_rate': 0.31219701035255626, 'n_estimators': 561, 'max_depth': 8, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:41,856][0m Trial 93 finished with value: 0.86 and parameters: {'learning_rate': 0.1478535296301353, 'n_estimators': 679, 'max_depth': 7, 'min_child_weight': 2}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:42,072][0m Trial 94 finished with value: 0.86 and parameters: {'learning_rate': 0.011953915003484125, 'n_estimators': 720, 'max_depth': 7, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:42,283][0m Trial 95 finished with value: 0.86 and parameters: {'learning_rate': 0.18645317418517782, 'n_estimators': 993, 'max_depth': 8, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:42,495][0m Trial 96 finished with value: 0.86 and parameters: {'learning_rate': 0.11771122320495944, 'n_estimators': 856, 'max_depth': 10, 'min_child_weight': 6}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:42,708][0m Trial 97 finished with value: 0.86 and parameters: {'learning_rate': 0.027747586648472097, 'n_estimators': 812, 'max_depth': 7, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:42,920][0m Trial 98 finished with value: 0.86 and parameters: {'learning_rate': 0.07273268634703389, 'n_estimators': 885, 'max_depth': 10, 'min_child_weight': 7}. Best is trial 0 with value: 0.86.[0m




  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):
[32m[I 2022-04-08 12:19:43,137][0m Trial 99 finished with value: 0.86 and parameters: {'learning_rate': 0.035983847844053674, 'n_estimators': 956, 'max_depth': 1, 'min_child_weight': 8}. Best is trial 0 with value: 0.86.[0m


Number of finished trials:  100
Best trial:
  Value: 0.86
  Params: 
    learning_rate: 0.29347467516879233
    n_estimators: 146
    max_depth: 1
    min_child_weight: 6


In [26]:
# Reading the dataset:
df = pd.read_csv('Zernike_Moments_YN_250.csv', header=None)

In [27]:
# Shuffling the whole dataset:
df = df.sample(frac=1, random_state=0).reset_index(drop=True)

In [28]:
# Converting categorical values of Target feature into numerical:
df[289].replace(['YES','NO'], [1,0], inplace=True)

In [29]:
# Independent and Dependent features:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [30]:
# Train-Test spilt
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

In [31]:
# Using Best parameters found from HyperParameter Optimization done using Optuna:
xgb = XGBClassifier(
                    n_estimators = 146,
                    learning_rate = 0.2934,
                    max_depth = 1,
                    min_child_weight = 6
                   )

In [33]:
# Fitting the Training data:
xgb.fit(x_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.2934,
              max_delta_step=0, max_depth=1, min_child_weight=6, missing=nan,
              monotone_constraints='()', n_estimators=146, n_jobs=12,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [34]:
# Predicting the test data:
y_pred = xgb.predict(x_test)

### Results:

In [35]:
print("Accuracy :", round(accuracy_score(y_test, y_pred), 2)*100, "%")

Accuracy : 86.0 %


In [36]:
print("Precision :", round(precision_score(y_test, y_pred), 2)*100, "%")

Precision : 88.0 %


In [37]:
print("Recall :", round(recall_score(y_test, y_pred), 2)*100, "%")

Recall : 90.0 %


In [38]:
print("f1_Score :", round(f1_score(y_test, y_pred), 2)*100, "%")

f1_Score : 89.0 %


In [39]:
print("Confusion Matrix :")
print(confusion_matrix(y_test, y_pred))

Confusion Matrix :
[[15  4]
 [ 3 28]]


In [40]:
print("Classification Report :")
print(classification_report(y_test, y_pred))

Classification Report :
              precision    recall  f1-score   support

           0       0.83      0.79      0.81        19
           1       0.88      0.90      0.89        31

    accuracy                           0.86        50
   macro avg       0.85      0.85      0.85        50
weighted avg       0.86      0.86      0.86        50

