## Brain Tumour Detection model using XGBoost Classifier and Hyperparameter Optimization using Optuna:

In [1]:
# !pip install optuna

In [3]:
# Importing Optuna
import optuna
print('optuna %s' % optuna.__version__)

optuna 2.10.0


In [5]:
# Importing required Libraries:
import numpy as np
import pandas as pd
from sklearn.model_selection import train_test_split
from xgboost import XGBClassifier
from sklearn.metrics import accuracy_score, confusion_matrix, classification_report, precision_score, recall_score, f1_score

In [6]:
# Creating a Objective function:
def objective(trial):
    
    # Reading the dataset:
    df = pd.read_csv('Zernike_Moments_YN_3000.csv', header=None)
    
    # Shuffling the whole dataset:
    df = df.sample(frac=1, random_state=0).reset_index(drop=True)
    
    # Converting categorical values of Target feature into numerical:
    df[289].replace(['YES','NO'], [1,0], inplace=True)
    
    # Independent and Dependent features:
    X = df.iloc[:, :-1]
    y = df.iloc[:, -1]
    
    # Train-Test spilt:
    x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

    # Listing Parameters Range:
    param = {
            "learning_rate": trial.suggest_float("learning_rate", 1e-5, 1.0),
            "n_estimators": trial.suggest_int("n_estimators", 100,1000),
            "max_depth": trial.suggest_int("max_depth", 1,10),
            "min_child_weight": trial.suggest_int("min_child_weight", 1,10)
            }
    
    # Using XGBoost Classifier:
    xgb = XGBClassifier()
    xgb.fit(x_train, y_train)
    y_pred = xgb.predict(x_test)
    
    return accuracy_score(y_test, y_pred)

if __name__ == "__main__":
    
    study = optuna.create_study(direction="maximize")
    study.optimize(objective, n_trials=100, timeout=600)

    print("Number of finished trials: ", len(study.trials))
    print("Best trial:")
    trial = study.best_trial

    print("  Value: {}".format(trial.value))
    print("  Params: ")
    
    for key, value in trial.params.items():
        print("    {}: {}".format(key, value))

[32m[I 2022-04-08 12:24:05,004][0m A new study created in memory with name: no-name-5ef2f0e0-de81-4a08-8735-2d47be3d83a9[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:06,706][0m Trial 0 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3718938953498642, 'n_estimators': 977, 'max_depth': 4, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:08,643][0m Trial 1 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.9526559767550806, 'n_estimators': 161, 'max_depth': 6, 'min_child_weight': 2}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:10,395][0m Trial 2 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3224892631373386, 'n_estimators': 903, 'max_depth': 5, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:12,168][0m Trial 3 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.509912069894138, 'n_estimators': 245, 'max_depth': 3, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:13,943][0m Trial 4 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5573275278865049, 'n_estimators': 300, 'max_depth': 1, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:15,412][0m Trial 5 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.55355967122621, 'n_estimators': 862, 'max_depth': 1, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:16,603][0m Trial 6 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6686415676419918, 'n_estimators': 205, 'max_depth': 1, 'min_child_weight': 2}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:17,918][0m Trial 7 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.4303625128460491, 'n_estimators': 438, 'max_depth': 1, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:19,202][0m Trial 8 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.07152859472936492, 'n_estimators': 715, 'max_depth': 4, 'min_child_weight': 1}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:20,403][0m Trial 9 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5109794632893258, 'n_estimators': 525, 'max_depth': 4, 'min_child_weight': 3}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:21,735][0m Trial 10 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.14309160656939718, 'n_estimators': 997, 'max_depth': 9, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:23,078][0m Trial 11 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.9697386978598737, 'n_estimators': 712, 'max_depth': 7, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:24,458][0m Trial 12 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.993958386919797, 'n_estimators': 103, 'max_depth': 7, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:25,764][0m Trial 13 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7911638074102019, 'n_estimators': 398, 'max_depth': 7, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:27,064][0m Trial 14 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.2674724990275529, 'n_estimators': 680, 'max_depth': 10, 'min_child_weight': 3}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:28,423][0m Trial 15 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7942822221204721, 'n_estimators': 589, 'max_depth': 6, 'min_child_weight': 1}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:29,774][0m Trial 16 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.2767556422402343, 'n_estimators': 849, 'max_depth': 5, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:31,094][0m Trial 17 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.22383491774432024, 'n_estimators': 688, 'max_depth': 10, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:32,450][0m Trial 18 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7235002069643292, 'n_estimators': 596, 'max_depth': 3, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:33,797][0m Trial 19 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.0028290566471470546, 'n_estimators': 853, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:35,298][0m Trial 20 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3766797618533564, 'n_estimators': 999, 'max_depth': 9, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:36,711][0m Trial 21 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.20208528760111322, 'n_estimators': 604, 'max_depth': 3, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:38,068][0m Trial 22 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.0546254474571895, 'n_estimators': 806, 'max_depth': 3, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:39,414][0m Trial 23 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.39871619212379983, 'n_estimators': 999, 'max_depth': 8, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:40,874][0m Trial 24 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3733056250411784, 'n_estimators': 935, 'max_depth': 2, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:42,335][0m Trial 25 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.1746934828870154, 'n_estimators': 796, 'max_depth': 4, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:43,710][0m Trial 26 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.43159159438417166, 'n_estimators': 787, 'max_depth': 8, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:45,058][0m Trial 27 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.36291726528005647, 'n_estimators': 939, 'max_depth': 2, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:46,385][0m Trial 28 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.14764501314031603, 'n_estimators': 771, 'max_depth': 4, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:47,766][0m Trial 29 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.44541240790230696, 'n_estimators': 771, 'max_depth': 6, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:49,163][0m Trial 30 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6124188824018815, 'n_estimators': 933, 'max_depth': 2, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:50,471][0m Trial 31 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3123113457572619, 'n_estimators': 913, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:51,845][0m Trial 32 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.462395338956461, 'n_estimators': 744, 'max_depth': 6, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:53,240][0m Trial 33 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6101536334291088, 'n_estimators': 898, 'max_depth': 5, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:54,527][0m Trial 34 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3220408627772518, 'n_estimators': 938, 'max_depth': 5, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:55,815][0m Trial 35 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.03555273873898285, 'n_estimators': 850, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:57,185][0m Trial 36 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.09536285885712792, 'n_estimators': 497, 'max_depth': 3, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:58,477][0m Trial 37 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.22048559239489268, 'n_estimators': 992, 'max_depth': 2, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:24:59,786][0m Trial 38 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.09518839580989202, 'n_estimators': 638, 'max_depth': 4, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:01,076][0m Trial 39 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.22311844035531012, 'n_estimators': 377, 'max_depth': 3, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:02,371][0m Trial 40 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5239442712041905, 'n_estimators': 883, 'max_depth': 2, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:03,673][0m Trial 41 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.38497115592222253, 'n_estimators': 801, 'max_depth': 4, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:04,976][0m Trial 42 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.4133385551512081, 'n_estimators': 973, 'max_depth': 8, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:06,258][0m Trial 43 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.472793653489921, 'n_estimators': 818, 'max_depth': 8, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:07,540][0m Trial 44 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.34926837738341754, 'n_estimators': 957, 'max_depth': 1, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:08,830][0m Trial 45 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.28138449511580227, 'n_estimators': 905, 'max_depth': 2, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:10,123][0m Trial 46 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.15830724981822578, 'n_estimators': 743, 'max_depth': 4, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:11,419][0m Trial 47 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.1597670020110208, 'n_estimators': 776, 'max_depth': 4, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:12,802][0m Trial 48 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.4530944054028286, 'n_estimators': 757, 'max_depth': 7, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:14,117][0m Trial 49 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5515980500307324, 'n_estimators': 674, 'max_depth': 6, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:15,443][0m Trial 50 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6451231970526862, 'n_estimators': 874, 'max_depth': 1, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:16,733][0m Trial 51 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5988615584023026, 'n_estimators': 929, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:18,026][0m Trial 52 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.48366478712796557, 'n_estimators': 746, 'max_depth': 6, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:19,318][0m Trial 53 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6941392267047133, 'n_estimators': 832, 'max_depth': 6, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:20,612][0m Trial 54 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6124146547728353, 'n_estimators': 902, 'max_depth': 5, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:21,908][0m Trial 55 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.8903401943485005, 'n_estimators': 878, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:23,205][0m Trial 56 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3175214051294765, 'n_estimators': 914, 'max_depth': 5, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:24,503][0m Trial 57 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7351897809071255, 'n_estimators': 925, 'max_depth': 5, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:25,801][0m Trial 58 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.3091862993306435, 'n_estimators': 841, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:27,097][0m Trial 59 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5316329066032901, 'n_estimators': 970, 'max_depth': 7, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:28,456][0m Trial 60 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.005842411307642445, 'n_estimators': 450, 'max_depth': 3, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:29,786][0m Trial 61 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.0962614735666561, 'n_estimators': 975, 'max_depth': 3, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:31,088][0m Trial 62 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.10143550013093479, 'n_estimators': 548, 'max_depth': 4, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:32,379][0m Trial 63 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.06289096862990656, 'n_estimators': 439, 'max_depth': 4, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:33,677][0m Trial 64 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.23691695618719258, 'n_estimators': 377, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:34,974][0m Trial 65 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.19949589576132692, 'n_estimators': 296, 'max_depth': 3, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:36,278][0m Trial 66 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.1127990182354097, 'n_estimators': 476, 'max_depth': 2, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:37,579][0m Trial 67 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.022827728329917717, 'n_estimators': 371, 'max_depth': 2, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:38,885][0m Trial 68 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.2381142780363324, 'n_estimators': 643, 'max_depth': 1, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:40,179][0m Trial 69 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.12704411949951364, 'n_estimators': 513, 'max_depth': 2, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:41,472][0m Trial 70 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5149753280834932, 'n_estimators': 970, 'max_depth': 4, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:42,767][0m Trial 71 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5761191791556772, 'n_estimators': 691, 'max_depth': 7, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:44,098][0m Trial 72 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6550135502075132, 'n_estimators': 725, 'max_depth': 6, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:45,456][0m Trial 73 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.5654250044244655, 'n_estimators': 740, 'max_depth': 7, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:46,760][0m Trial 74 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6795097339378642, 'n_estimators': 828, 'max_depth': 6, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:48,063][0m Trial 75 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.4859072504443032, 'n_estimators': 767, 'max_depth': 6, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:49,368][0m Trial 76 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.6082650711952062, 'n_estimators': 666, 'max_depth': 6, 'min_child_weight': 8}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:50,666][0m Trial 77 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7023701330244534, 'n_estimators': 876, 'max_depth': 7, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:51,967][0m Trial 78 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.875654905236742, 'n_estimators': 872, 'max_depth': 5, 'min_child_weight': 6}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:53,292][0m Trial 79 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.8625880616738307, 'n_estimators': 890, 'max_depth': 5, 'min_child_weight': 3}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:54,589][0m Trial 80 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7884693006033979, 'n_estimators': 927, 'max_depth': 5, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:55,886][0m Trial 81 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7245313075700799, 'n_estimators': 836, 'max_depth': 5, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:57,179][0m Trial 82 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7734034709114297, 'n_estimators': 848, 'max_depth': 5, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:58,479][0m Trial 83 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.8472920422434891, 'n_estimators': 951, 'max_depth': 6, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:25:59,794][0m Trial 84 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.9108957166299856, 'n_estimators': 907, 'max_depth': 5, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:01,194][0m Trial 85 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.7430772547678571, 'n_estimators': 919, 'max_depth': 5, 'min_child_weight': 2}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:02,497][0m Trial 86 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.34172472870889503, 'n_estimators': 980, 'max_depth': 4, 'min_child_weight': 5}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:03,808][0m Trial 87 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.9977210118465342, 'n_estimators': 557, 'max_depth': 4, 'min_child_weight': 3}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:05,101][0m Trial 88 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.002218571866986356, 'n_estimators': 455, 'max_depth': 3, 'min_child_weight': 3}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:06,402][0m Trial 89 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.08189271882833037, 'n_estimators': 407, 'max_depth': 4, 'min_child_weight': 4}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:07,703][0m Trial 90 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.047713599800375414, 'n_estimators': 425, 'max_depth': 3, 'min_child_weight': 7}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:09,010][0m Trial 91 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.18881598679045794, 'n_estimators': 204, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:10,313][0m Trial 92 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.1297394061696447, 'n_estimators': 321, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:11,615][0m Trial 93 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.11886963785368838, 'n_estimators': 448, 'max_depth': 3, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:12,920][0m Trial 94 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.029163176107040032, 'n_estimators': 337, 'max_depth': 2, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:14,228][0m Trial 95 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.06635426306694965, 'n_estimators': 477, 'max_depth': 1, 'min_child_weight': 10}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:15,525][0m Trial 96 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.2481896083139875, 'n_estimators': 281, 'max_depth': 2, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:16,914][0m Trial 97 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.11019257238428914, 'n_estimators': 529, 'max_depth': 2, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:18,217][0m Trial 98 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.20617776311400873, 'n_estimators': 380, 'max_depth': 1, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m
  elif isinstance(data.columns, (pd.Int64Index, pd.RangeIndex)):




[32m[I 2022-04-08 12:26:19,523][0m Trial 99 finished with value: 0.9565217391304348 and parameters: {'learning_rate': 0.176466773883667, 'n_estimators': 351, 'max_depth': 2, 'min_child_weight': 9}. Best is trial 0 with value: 0.9565217391304348.[0m


Number of finished trials:  100
Best trial:
  Value: 0.9565217391304348
  Params: 
    learning_rate: 0.3718938953498642
    n_estimators: 977
    max_depth: 4
    min_child_weight: 8


In [7]:
# Reading the dataset:
df = pd.read_csv('Zernike_Moments_YN_3000.csv', header=None)

In [8]:
# Shuffling the whole dataset:
df = df.sample(frac=1, random_state=0).reset_index(drop=True)

In [9]:
# Converting categorical values of Target feature into numerical:
df[289].replace(['YES','NO'], [1,0], inplace=True)

In [10]:
# Independent and Dependent features:
X = df.iloc[:, :-1]
y = df.iloc[:, -1]

In [11]:
# Train-Test spilt
x_train, x_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=0) 

In [12]:
# Using Best parameters found from HyperParameter Optimization done using Optuna:
xgb = XGBClassifier(
                    n_estimators = 977,
                    learning_rate = 0.3718,
                    max_depth = 4,
                    min_child_weight = 8
                   )

In [24]:
# Fitting the Training data:
xgb.fit(x_train, y_train)



XGBClassifier(base_score=0.5, booster='gbtree', colsample_bylevel=1,
              colsample_bynode=1, colsample_bytree=1, enable_categorical=False,
              gamma=0, gpu_id=-1, importance_type=None,
              interaction_constraints='', learning_rate=0.3718,
              max_delta_step=0, max_depth=4, min_child_weight=8, missing=nan,
              monotone_constraints='()', n_estimators=977, n_jobs=12,
              num_parallel_tree=1, predictor='auto', random_state=0,
              reg_alpha=0, reg_lambda=1, scale_pos_weight=1, subsample=1,
              tree_method='exact', validate_parameters=1, verbosity=None)

In [25]:
# Predicting the test data:
y_pred = xgb.predict(x_test)

### Results:

In [26]:
print("Accuracy :", round(accuracy_score(y_test, y_pred), 2)*100, "%")

Accuracy : 96.0 %


In [27]:
print("Precision :", round(precision_score(y_test, y_pred), 2)*100, "%")

Precision : 97.0 %


In [28]:
print("Recall :", round(recall_score(y_test, y_pred), 2)*100, "%")

Recall : 97.0 %


In [29]:
print("f1_Score :", round(f1_score(y_test, y_pred), 2)*100, "%")

f1_Score : 97.0 %


In [30]:
print("Confusion Matrix :")
print(confusion_matrix(y_test, y_pred))

Confusion Matrix :
[[196  11]
 [ 12 333]]


In [31]:
print("Classification Report :")
print(classification_report(y_test, y_pred))

Classification Report :
              precision    recall  f1-score   support

           0       0.94      0.95      0.94       207
           1       0.97      0.97      0.97       345

    accuracy                           0.96       552
   macro avg       0.96      0.96      0.96       552
weighted avg       0.96      0.96      0.96       552

