In [1]:
import os
import pandas as pd
from xgboost import XGBClassifier
# import lightgbm as lgb
from lightgbm import LGBMClassifier
from sklearn.model_selection import KFold, StratifiedKFold, train_test_split, GridSearchCV, cross_val_score
from sklearn.metrics import accuracy_score
# sklearn.metrics.accuracy_score
import xgboost as xgb
import numpy as np
import itertools 
import tqdm
from time import sleep
import random

In [2]:
import warnings 
warnings.filterwarnings("ignore")

In [3]:
# mannually generate the search grid
def makeGrid(pars_dict):  
    keys=pars_dict.keys()
    combinations=itertools.product(*pars_dict.values())
    ds=[dict(zip(keys,cc)) for cc in combinations]
    return ds

# Dataset Preparation

In [4]:
df_raw = pd.read_excel("../Dataset/MergedDataset_231207_ForElly_Excel.xlsx")

# Remove "." in the feature column
row_num, column_num = df_raw.shape
for rowID in range(row_num):
    for columnID in range(column_num):
        if "." == df_raw.iloc[rowID, columnID]:
            df_raw.iloc[rowID, columnID] = np.nan

# prepare the feature list
featureList = []
for item in df_raw.columns:
    if item.startswith("Item"):
        featureList.append(item)

In [5]:
df = df_raw[df_raw["EndDesc"] == df_raw["EndDesc"]]
df.reset_index().drop("index", axis=1)

print(df["EndDesc"].unique())
print(df.shape)

labelRangeList = df["EndDesc"].unique().tolist()
print(labelRangeList)
# labelContent = []
# for idx, row in df.iterrows():
#     index = labelRangeList.index(row["EndDesc"])
#     label = [0 for i in range(len(labelRangeList))]
#     label[index] = 1
#     labelContent.append(label)
# # df["label"] = labelList

# labelContent = np.array(labelContent)

# labelList = ["label_{}".format(i) for i in range(len(labelRangeList))]
# print(labelList)
# for idx, item in enumerate(labelList):
#     df[item] = labelContent[:, idx]

labelList = []
for idx, row in df.iterrows():
    label = labelRangeList.index(row["EndDesc"])
    labelList.append(label)
df["label"] = labelList

labelList = ["label"]

['Mutually agreed completion of treatment'
 'Termition of treatment earlier than Care Professiol planned'
 'Not suitable for IAPT service - no action taken or directed back to referrer'
 'Referred to another therapy service by mutual agreement']
(570, 279)
['Mutually agreed completion of treatment', 'Termition of treatment earlier than Care Professiol planned', 'Not suitable for IAPT service - no action taken or directed back to referrer', 'Referred to another therapy service by mutual agreement']


# Use RecoveryDesc as the label

In [None]:
df = df_raw[df_raw["RecoveryDesc"] == df_raw["RecoveryDesc"]]
df = df[(df["RecoveryDesc"].isin(["At recovery", "Not at recovery"]))]
df.reset_index().drop("index", axis=1)
print(df.shape)

labelList = []
for idx, row in df.iterrows():
    if (row["ReliableChangeDesc"] == "Reliable improvement") & (row["ReliableRecoveryDesc"] == "Reliable recovery") & (row["RecoveryDesc"] == "At recovery"):
        labelList.append(1)
    else:
        labelList.append(0)
df["label"] = labelList
labelList = ["label"]

## Train Test Split

In [6]:
# Keep 10% data as the test data
train, test = train_test_split(df, test_size=0.1)

In [7]:
# KFold Split
skf = StratifiedKFold(n_splits=5)
print(skf)

StratifiedKFold(n_splits=5, random_state=None, shuffle=False)


In [8]:
FoldList = []
for i, (train_index, val_index) in enumerate(skf.split(train[featureList], train[labelList])):
    # print(f"Fold {i}:")
    # print(f"  Train: index={train_index}")
    # print(f"  val:  index={val_index}")
    foldTrain = xgb.DMatrix(train.iloc[train_index][featureList].values, train.iloc[train_index][labelList].values.squeeze())
    foldVal = xgb.DMatrix(train.iloc[val_index][featureList].values, train.iloc[val_index][labelList].values.squeeze())
    dic = {}
    dic["train"] = foldTrain
    dic["val"] = foldVal
    FoldList.append(dic)

In [9]:
xgtrain = xgb.DMatrix(train[featureList].values, train[labelList].values.squeeze())
xgtest = xgb.DMatrix(test[featureList].values, test[labelList].values.squeeze())

## XGboost single train

In [9]:
# # DT 0.55
# param_dict = {
#     'max_depth': 4, 
#     'learning_rate': 0.1, 
#     'objective': 'multi:softmax', 
#     'num_class': 4, 
#     'booster': 'gbtree', 
#     'n_jobs': 5, 
#     'subsample': 1.0, 
#     'colsample_bytree': 1.0, 
#     'colsample_bylevel': 0.1, 
#     'colsample_bynode': 0.7, 
#     'num_parallel_tree': 1,
# }

# Random Forest 0.6316
param_dict = {
    'max_depth': 10, 
    'learning_rate': 0.1, 
    'objective': 'multi:softmax', 
    'num_class': 4, 
    'booster': 'gbtree', 
    'n_jobs': 5, 
    'subsample': 1.0, 
    'colsample_bytree': 1.0, 
    'colsample_bylevel': 0.1, 
    'colsample_bynode': 1.0, 
    'num_parallel_tree': 8
}

In [10]:
# CV
scores = []
for fold in FoldList:
    bst = xgb.train(param_dict, fold["train"], num_boost_round=1)
    preds = bst.predict(fold["val"])
    labels = fold["val"].get_label()
    acc = accuracy_score(labels, preds)
    scores.append(acc)
print(scores)
print("Accuracy Mean: {}, accuracy std: {}".format(np.mean(scores), np.std(scores)))

[0.5631067961165048, 0.5533980582524272, 0.6116504854368932, 0.5686274509803921, 0.6078431372549019]
Accuracy Mean: 0.5809251856082238, accuracy std: 0.02406280196457573


In [11]:
# watchlist  = [(xgtest,'eval'), (xgtrain,'train')]
bst = xgb.train(param_dict, xgtrain, num_boost_round=1)
preds = bst.predict(xgtest)
labels = xgtest.get_label()
acc = accuracy_score(labels, preds)
print("Test Accuracy: {:.4f}".format(acc))

Test Accuracy: 0.6316


# Grid Search for Single Tree Xgboost(Theoritically it is equal to DT)

In [9]:
param_dict = {
    # "n_estimators": [1],
    "max_depth": [4,5,6,7,8,10,12,16,24,32],
    "learning_rate": [0.1, 0.15, 0.2, 0.5, 0.7, 0.9, 1],
    "objective": ["multi:softmax"],
    'num_class': [4],
    "booster": ["gbtree"],
    "n_jobs": [5],
    "subsample": [1.0],
    'colsample_bytree': [1.0],
    'colsample_bylevel': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
    'colsample_bynode': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0], 
    'num_parallel_tree': [1],
}

In [9]:
def MyGridSearch(param_dict, FoldList):
    searchSpace = makeGrid(param_dict)
    print("Search Space Size:" + len(searchSpace).__str__())
    resultList = []
    for param in tqdm.tqdm(searchSpace):
        #sleep(1)
        scores = []
        for fold in FoldList:
            bst = xgb.train(param, fold["train"], num_boost_round=1)
            preds = bst.predict(fold["val"])
            labels = fold["val"].get_label()
            acc = accuracy_score(labels, preds)
            scores.append(acc)
        result = {}
        result["acc_mean"] = np.mean(scores)
        result["acc_std"] = np.std(scores)
        result["acc"] = scores
        result["param"] = param
        resultList.append(result)
    sortedResult = sorted(resultList, key=lambda x: x["acc_mean"], reverse=True)
    print(sortedResult[:5])
    return sortedResult

In [11]:
results = MyGridSearch(param_dict, FoldList)

Search Space Size:2520


100%|██████████| 2520/2520 [01:49<00:00, 22.98it/s]

[{'acc_mean': 0.6161050828098229, 'acc_std': 0.0302653887786102, 'acc': [0.6213592233009708, 0.5922330097087378, 0.5728155339805825, 0.6372549019607843, 0.6568627450980392], 'param': {'max_depth': 4, 'learning_rate': 0.1, 'objective': 'multi:softmax', 'num_class': 4, 'booster': 'gbtree', 'n_jobs': 5, 'subsample': 1.0, 'colsample_bytree': 1.0, 'colsample_bylevel': 0.1, 'colsample_bynode': 0.7, 'num_parallel_tree': 1}}, {'acc_mean': 0.6161050828098229, 'acc_std': 0.0302653887786102, 'acc': [0.6213592233009708, 0.5922330097087378, 0.5728155339805825, 0.6372549019607843, 0.6568627450980392], 'param': {'max_depth': 4, 'learning_rate': 0.15, 'objective': 'multi:softmax', 'num_class': 4, 'booster': 'gbtree', 'n_jobs': 5, 'subsample': 1.0, 'colsample_bytree': 1.0, 'colsample_bylevel': 0.1, 'colsample_bynode': 0.7, 'num_parallel_tree': 1}}, {'acc_mean': 0.6161050828098229, 'acc_std': 0.0302653887786102, 'acc': [0.6213592233009708, 0.5922330097087378, 0.5728155339805825, 0.6372549019607843, 0.65




# Grid Search for multi Tree Xgboost(Theoritically it is equal to Randomforest)

In [10]:
param_dict = {
    # "n_estimators": [1],
    "max_depth": [4,6,8,10,16,32],
    "learning_rate": [0.1, 0.15, 0.2, 0.5, 0.7, 0.9, 1],
    "objective": ["multi:softmax"],
    'num_class': [4],
    "booster": ["gbtree"],
    "n_jobs": [5],
    "subsample": [1.0],
    'colsample_bytree': [1.0],
    'colsample_bylevel': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
    'colsample_bynode': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0], 
    'num_parallel_tree': [4, 8, 16, 64],
}

In [11]:
results = MyGridSearch(param_dict, FoldList)

Search Space Size:6048


100%|██████████| 6048/6048 [1:11:29<00:00,  1.41it/s]

[{'acc_mean': 0.6297353893013515, 'acc_std': 0.054568743935994236, 'acc': [0.6116504854368932, 0.6504854368932039, 0.5728155339805825, 0.7254901960784313, 0.5882352941176471], 'param': {'max_depth': 10, 'learning_rate': 0.1, 'objective': 'multi:softmax', 'num_class': 4, 'booster': 'gbtree', 'n_jobs': 5, 'subsample': 1.0, 'colsample_bytree': 1.0, 'colsample_bylevel': 0.1, 'colsample_bynode': 1.0, 'num_parallel_tree': 8}}, {'acc_mean': 0.6297353893013515, 'acc_std': 0.054568743935994236, 'acc': [0.6116504854368932, 0.6504854368932039, 0.5728155339805825, 0.7254901960784313, 0.5882352941176471], 'param': {'max_depth': 10, 'learning_rate': 0.15, 'objective': 'multi:softmax', 'num_class': 4, 'booster': 'gbtree', 'n_jobs': 5, 'subsample': 1.0, 'colsample_bytree': 1.0, 'colsample_bylevel': 0.1, 'colsample_bynode': 1.0, 'num_parallel_tree': 8}}, {'acc_mean': 0.6297353893013515, 'acc_std': 0.054568743935994236, 'acc': [0.6116504854368932, 0.6504854368932039, 0.5728155339805825, 0.72549019607843




## Grid Search for multi Full Xgboost

In [10]:
param_dict = {
    "n_estimators": [10, 50, 100, 150],
    "max_depth": [4,8,10,16],
    "learning_rate": [0.05, 0.1, 0.2, 0.3],
    "objective": ["multi:softmax"],
    'num_class': [4],
    "booster": ["gbtree"],
    "n_jobs": [5],
    "subsample": [0.5, 1.0],
    'colsample_bytree': [0.3, 0.5, 0.7, 1.0],
    'colsample_bylevel': [0.3, 0.5, 0.7, 1.0],
    'colsample_bynode': [0.3, 0.5, 0.7, 1.0], 
    'num_parallel_tree': [4, 8, 16],
}

In [11]:
def MyGridSearch(param_dict, FoldList):
    searchSpace = makeGrid(param_dict)
    print("Search Space Size:" + len(searchSpace).__str__())
    resultList = []
    for param in tqdm.tqdm(searchSpace):
        #sleep(1)
        scores = []
        for fold in FoldList:
            bst = xgb.train(param, fold["train"], num_boost_round=param["n_estimators"])
            preds = bst.predict(fold["val"])
            labels = fold["val"].get_label()
            acc = accuracy_score(labels, preds)
            scores.append(acc)
        result = {}
        result["acc_mean"] = np.mean(scores)
        result["acc_std"] = np.std(scores)
        result["acc"] = scores
        result["param"] = param
        resultList.append(result)
    sortedResult = sorted(resultList, key=lambda x: x["acc_mean"], reverse=True)
    print(sortedResult[:5])
    return sortedResult

In [12]:
results = MyGridSearch(param_dict, FoldList)

Search Space Size:24576


100%|██████████| 24576/24576 [32:14:31<00:00,  4.72s/it]   

[{'acc_mean': 0.6179516466780888, 'acc_std': 0.02393823050191032, 'acc': [0.6019417475728155, 0.6310679611650486, 0.6116504854368932, 0.6568627450980392, 0.5882352941176471], 'param': {'n_estimators': 10, 'max_depth': 10, 'learning_rate': 0.05, 'objective': 'multi:softmax', 'num_class': 4, 'booster': 'gbtree', 'n_jobs': 5, 'subsample': 0.5, 'colsample_bytree': 0.3, 'colsample_bylevel': 1.0, 'colsample_bynode': 1.0, 'num_parallel_tree': 4}}, {'acc_mean': 0.6179516466780888, 'acc_std': 0.02393823050191032, 'acc': [0.6019417475728155, 0.6310679611650486, 0.6116504854368932, 0.6568627450980392, 0.5882352941176471], 'param': {'n_estimators': 10, 'max_depth': 16, 'learning_rate': 0.05, 'objective': 'multi:softmax', 'num_class': 4, 'booster': 'gbtree', 'n_jobs': 5, 'subsample': 0.5, 'colsample_bytree': 0.3, 'colsample_bylevel': 1.0, 'colsample_bynode': 1.0, 'num_parallel_tree': 4}}, {'acc_mean': 0.6179326099371788, 'acc_std': 0.026517503611837186, 'acc': [0.6019417475728155, 0.631067961165048




In [7]:
param_dict = {
    'learning_rate': 0.1, 
    'max_depth': 5, 
    'colsample_bytree': 0.6, 
    'colsample_bylevel': 0.1, 
    'colsample_bynode': 1.0, 
    'num_parallel_tree': 1, 
    'objective': 'multi:softmax', 
    'num_class': 4
}
clf = xgb.XGBClassifier(**param_dict)

In [8]:
scores = cross_val_score(clf, train[featureList].values, train[labelList].values.squeeze(), cv=5, scoring="accuracy")
print(scores)
print("Accuracy mean: {:.4f}, accuracy std: {:.4f}".format(np.mean(scores), np.std(scores)))



[0.54368932 0.59223301 0.58252427 0.60784314 0.60784314]
Accuracy mean: 0.5868, accuracy std: 0.0236


In [17]:
clf = xgb.XGBClassifier(**param_dict)
clf = clf.fit(train[featureList].values,train[labelList].values.squeeze())
preds = clf.predict(test[featureList].values)
labels = test[labelList].values
acc = accuracy_score(labels, preds)
print(acc)

0.5964912280701754


In [24]:
watchlist  = [(xgtest,'eval'), (xgtrain,'train')]
bst = xgb.train(param_dict, xgtrain, num_boost_round=1, evals=watchlist, verbose_eval=False)
preds = bst.predict(xgtest)
labels = xgtest.get_label()
acc = accuracy_score(labels, preds)
print("Test Accuracy: {:.4f}".format(acc))

Test Accuracy: 0.5614


In [32]:
from xgboost import cv

In [46]:
xgb_cv = cv(
    dtrain=xgtrain, 
    params=param_dict, 
    nfold=10,
    num_boost_round=1, 
    metrics="merror", 
    as_pandas=True, 
)

In [47]:
xgb_cv

Unnamed: 0,train-merror-mean,train-merror-std,test-merror-mean,test-merror-std
0,0.340916,0.010395,0.413462,0.074677


In [26]:
kf = KFold(n_splits=5)

print(kf)


KFold(n_splits=5, random_state=None, shuffle=False)


In [27]:
for i, (train_index, test_index) in enumerate(kf.split(train)):
    print(f"Fold {i}:")
    print(f"  Train: index={train_index}")
    print(f"  Test:  index={test_index}")

Fold 0:
  Train: index=[103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120
 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138
 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156
 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174
 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192
 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210
 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228
 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246
 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264
 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282
 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 298 299 300
 301 302 303 304 305 306 307 308 309 310 311 312 313 314 315 316 317 318
 319 320 321 322 323 324 325 326 327 328 329 330 331 332 333 334 335 336
 337 338 339 340 341 342 343

In [29]:
train.loc[train_index]

KeyError: '[1, 29, 44, 49, 55, 58, 72, 73, 74, 86, 91, 101, 125, 128, 140, 145, 156, 175, 179, 184, 214, 215, 220, 221, 222, 224, 232, 250, 259, 266, 268, 271, 281, 286, 303, 308, 314, 322, 342, 356, 360, 363, 365, 366, 373, 378, 382, 389, 396, 401, 402, 403, 405, 410] not in index'

In [30]:
train

Unnamed: 0.1,Unnamed: 0,IAPTus_Num,Referral Date,Age_ReferralRequest_ReceivedDate,EthnicDescGroupCode,EthnicCategoryGroupShortCode,GenderIdentity,SexualOrientationDesc,EndDesc,EndDescGroupShort,...,Item217,Item218,Item219,Item220,Item221,Item222,Item223,Item224,Item225,label
583,584,26184,2022-09-09,4.472136,1.0,1.0,2,,Mutually agreed completion of treatment,Seen and treated,...,0,1,0,0,0,0,0,0,0,0
136,137,24864,2020-06-01,4.358899,1.0,1.0,2,,Termition of treatment earlier than Care Profe...,Seen and treated,...,0,0,1,0,0,0,0,0,0,1
243,244,25164,2020-12-23,4.582576,,,2,,Mutually agreed completion of treatment,Seen and treated,...,,,,,,,,,,0
47,48,24569,2019-10-27,4.472136,1.0,1.0,2,,Mutually agreed completion of treatment,Seen and treated,...,,,,,,,,,,0
290,291,25290,2021-04-29,4.795832,9.0,4.0,1,,Termition of treatment earlier than Care Profe...,Seen and treated,...,0,,1,0,0,0,1,0,0,1
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
352,353,25447,2021-10-27,5.291503,1.0,1.0,1,,Mutually agreed completion of treatment,Seen and treated,...,0,1,1,1,1,0,0,0,0,0
571,572,26151,2022-08-28,4.582576,,,1,,Termition of treatment earlier than Care Profe...,Seen and treated,...,,,,,,,,,,1
110,111,24792,2020-05-06,4.472136,1.0,1.0,2,,Mutually agreed completion of treatment,Seen and treated,...,0,1,1,1,0,0,0,0,0,0
536,537,26049,2022-07-24,4.472136,1.0,1.0,2,,Termition of treatment earlier than Care Profe...,Seen and treated,...,0,0,0,0,1,0,0,0,0,1


In [31]:
xgtrain.

<xgboost.core.DMatrix at 0x7e43dc182330>

In [None]:
param_dict = {
  "learning_rate": [0.1, 0.15, 0.2, 0.5, 0.7, 0.9, 1],
  "max_depth": [4,5,6,7,8,10,12,16,24,32],
  'colsample_bytree': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
  'colsample_bylevel': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
  'colsample_bynode': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],  
  "num_parallel_tree": [1],
  "objective": ["multi:softmax"],
  'num_class': [4]
}

In [None]:
param_dict = {
    "n_estimators": [1],
    "max_depth": [4,5,6,7,8,10,12,16,24,32],
    "learning_rate": [0.1, 0.15, 0.2, 0.5, 0.7, 0.9, 1],
    "objective": ["multi:softmax"],
    "booster": ["gbtree"],
    "n_jobs": [5],
    "subsample": [1.0],
    'colsample_bytree': [1.0],
    'colsample_bylevel': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
    'colsample_bynode': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],     
}

In [None]:
    n_estimators : Optional[int]
        Number of boosting rounds.

    max_depth :  typing.Optional[int]
        Maximum tree depth for base learners.

    max_leaves : typing.Optional[int]
        Maximum number of leaves; 0 indicates no limit.

    max_bin : typing.Optional[int]
        If using histogram-based algorithm, maximum number of bins per feature

    grow_policy : typing.Optional[str]
        Tree growing policy.
        - depthwise: Favors splitting at nodes closest to the node,
        - lossguide: Favors splitting at nodes with highest loss change.

    learning_rate : typing.Optional[float]
        Boosting learning rate (xgb's "eta")

    verbosity : typing.Optional[int]
        The degree of verbosity. Valid values are 0 (silent) - 3 (debug).

    objective : typing.Union[str, xgboost.sklearn._SklObjWProto, typing.Callable[[typing.Any, typing.Any], typing.Tuple[numpy.ndarray, numpy.ndarray]], NoneType]
        Specify the learning task and the corresponding learning objective or a custom
        objective function to be used.

        For custom objective, see :doc:`/tutorials/custom_metric_obj` and
        :ref:`custom-obj-metric` for more information, along with the end note for
        function signatures.

    booster: typing.Optional[str]
        Specify which booster to use: ``gbtree``, ``gblinear`` or ``dart``.

    tree_method : typing.Optional[str]
        Specify which tree method to use.  Default to auto.  If this parameter is set to
        default, XGBoost will choose the most conservative option available.  It's
        recommended to study this option from the parameters document :doc:`tree method
        </treemethod>`

    n_jobs : typing.Optional[int]
        Number of parallel threads used to run xgboost.  When used with other
        Scikit-Learn algorithms like grid search, you may choose which algorithm to
        parallelize and balance the threads.  Creating thread contention will
        significantly slow down both algorithms.

    gamma : typing.Optional[float]
        (min_split_loss) Minimum loss reduction required to make a further partition on
        a leaf node of the tree.

    min_child_weight : typing.Optional[float]
        Minimum sum of instance weight(hessian) needed in a child.

    max_delta_step : typing.Optional[float]
        Maximum delta step we allow each tree's weight estimation to be.

    subsample : typing.Optional[float]
        Subsample ratio of the training instance.

    sampling_method : typing.Optional[str]
        Sampling method. Used only by the GPU version of ``hist`` tree method.
        - ``uniform``: Select random training instances uniformly.
        - ``gradient_based``: Select random training instances with higher probability
            when the gradient and hessian are larger. (cf. CatBoost)

    colsample_bytree : typing.Optional[float]
        Subsample ratio of columns when constructing each tree.

    colsample_bylevel : typing.Optional[float]
        Subsample ratio of columns for each level.

    colsample_bynode : typing.Optional[float]
        Subsample ratio of columns for each split.

    reg_alpha : typing.Optional[float]
        L1 regularization term on weights (xgb's alpha).

    reg_lambda : typing.Optional[float]
        L2 regularization term on weights (xgb's lambda).

    scale_pos_weight : typing.Optional[float]
        Balancing of positive and negative weights.

    base_score : typing.Optional[float]
        The initial prediction score of all instances, global bias.

    random_state : typing.Union[numpy.random.mtrand.RandomState, numpy.random._generator.Generator, int, NoneType]
        Random number seed.
        .. note::
           Using gblinear booster with shotgun updater is nondeterministic as
           it uses Hogwild algorithm.

    missing : float
        Value in the data which needs to be present as a missing value. Default to
        :py:data:`numpy.nan`.

    num_parallel_tree: typing.Optional[int]
        Used for boosting random forest.

    monotone_constraints : typing.Union[typing.Dict[str, int], str, NoneType]
        Constraint of variable monotonicity.  See :doc:`tutorial </tutorials/monotonic>`
        for more information.

    interaction_constraints : typing.Union[str, typing.List[typing.Tuple[str]], NoneType]
        Constraints for interaction representing permitted interactions.  The
        constraints must be specified in the form of a nested list, e.g. ``[[0, 1], [2,
        3, 4]]``, where each inner list is a group of indices of features that are
        allowed to interact with each other.  See :doc:`tutorial
        </tutorials/feature_interaction_constraint>` for more information

    importance_type: typing.Optional[str]
        The feature importance type for the feature_importances\_ property:
        * For tree model, it's either "gain", "weight", "cover", "total_gain" or
          "total_cover".
        * For linear model, only "weight" is defined and it's the normalized
          coefficients without bias.

    device : typing.Optional[str]
        .. versionadded:: 2.0.0
        Device ordinal, available options are `cpu`, `cuda`, and `gpu`.

    validate_parameters : typing.Optional[bool]
        Give warnings for unknown parameter.

    enable_categorical : bool
        See the same parameter of :py:class:`DMatrix` for details.

    feature_types : typing.Optional[typing.Sequence[str]]
        .. versionadded:: 1.7.0
        Used for specifying feature types without constructing a dataframe. See
        :py:class:`DMatrix` for details.

    max_cat_to_onehot : typing.Optional[int]
        .. versionadded:: 1.6.0
        .. note:: This parameter is experimental
        A threshold for deciding whether XGBoost should use one-hot encoding based split
        for categorical data.  When number of categories is lesser than the threshold
        then one-hot encoding is chosen, otherwise the categories will be partitioned
        into children nodes. Also, `enable_categorical` needs to be set to have
        categorical feature support. See :doc:`Categorical Data
        </tutorials/categorical>` and :ref:`cat-param` for details.

    max_cat_threshold : typing.Optional[int]
        .. versionadded:: 1.7.0
        .. note:: This parameter is experimental
        Maximum number of categories considered for each split. Used only by
        partition-based splits for preventing over-fitting. Also, `enable_categorical`
        needs to be set to have categorical feature support. See :doc:`Categorical Data
        </tutorials/categorical>` and :ref:`cat-param` for details.

    multi_strategy : typing.Optional[str]
        .. versionadded:: 2.0.0
        .. note:: This parameter is working-in-progress.
        The strategy used for training multi-target models, including multi-target
        regression and multi-class classification. See :doc:`/tutorials/multioutput` for
        more information.
        - ``one_output_per_tree``: One model for each target.
        - ``multi_output_tree``:  Use multi-target trees.

    eval_metric : typing.Union[str, typing.List[str], typing.Callable, NoneType]
        .. versionadded:: 1.6.0
        Metric used for monitoring the training result and early stopping.  It can be a
        string or list of strings as names of predefined metric in XGBoost (See
        doc/parameter.rst), one of the metrics in :py:mod:`sklearn.metrics`, or any
        other user defined metric that looks like `sklearn.metrics`.

        If custom objective is also provided, then custom metric should implement the
        corresponding reverse link function.

        Unlike the `scoring` parameter commonly used in scikit-learn, when a callable
        object is provided, it's assumed to be a cost function and by default XGBoost
        will minimize the result during early stopping.

        For advanced usage on Early stopping like directly choosing to maximize instead
        of minimize, see :py:obj:`xgboost.callback.EarlyStopping`.

        See :doc:`/tutorials/custom_metric_obj` and :ref:`custom-obj-metric` for more
        information.
        .. code-block:: python
            from sklearn.datasets import load_diabetes
            from sklearn.metrics import mean_absolute_error
            X, y = load_diabetes(return_X_y=True)
            reg = xgb.XGBRegressor(
                tree_method="hist",
                eval_metric=mean_absolute_error,
            )
            reg.fit(X, y, eval_set=[(X, y)])

    early_stopping_rounds : typing.Optional[int]
        .. versionadded:: 1.6.0
        - Activates early stopping. Validation metric needs to improve at least once in
          every **early_stopping_rounds** round(s) to continue training.  Requires at
          least one item in **eval_set** in :py:meth:`fit`.

        - If early stopping occurs, the model will have two additional attributes:
          :py:attr:`best_score` and :py:attr:`best_iteration`. These are used by the
          :py:meth:`predict` and :py:meth:`apply` methods to determine the optimal
          number of trees during inference. If users want to access the full model
          (including trees built after early stopping), they can specify the
          `iteration_range` in these inference methods. In addition, other utilities
          like model plotting can also use the entire model.

        - If you prefer to discard the trees after `best_iteration`, consider using the
          callback function :py:class:`xgboost.callback.EarlyStopping`.

        - If there's more than one item in **eval_set**, the last entry will be used for
          early stopping.  If there's more than one metric in **eval_metric**, the last
          metric will be used for early stopping.

    callbacks : typing.Optional[typing.List[xgboost.callback.TrainingCallback]]
        List of callback functions that are applied at end of each iteration.
        It is possible to use predefined callbacks by using
        :ref:`Callback API <callback_api>`.
        .. note::
           States in callback are not preserved during training, which means callback
           objects can not be reused for multiple training sessions without
           reinitialization or deepcopy.

        .. code-block:: python
            for params in parameters_grid:
                # be sure to (re)initialize the callbacks before each run
                callbacks = [xgb.callback.LearningRateScheduler(custom_rates)]
                reg = xgboost.XGBRegressor(**params, callbacks=callbacks)
                reg.fit(X, y)

    kwargs : typing.Optional[typing.Any]
        Keyword arguments for XGBoost Booster object.  Full documentation of parameters
        can be found :doc:`here </parameter>`.
        Attempting to set a parameter via the constructor args and \*\*kwargs
        dict simultaneously will result in a TypeError.
        .. note:: \*\*kwargs unsupported by scikit-learn
            \*\*kwargs is unsupported by scikit-learn.  We do not guarantee
            that parameters passed via this argument will interact properly
            with scikit-learn.

        .. note::  Custom objective function
            A custom objective function can be provided for the ``objective``
            parameter. In this case, it should have the signature ``objective(y_true,
            y_pred) -> [grad, hess]`` or ``objective(y_true, y_pred, *, sample_weight)
            -> [grad, hess]``:
            y_true: array_like of shape [n_samples]
                The target values
            y_pred: array_like of shape [n_samples]
                The predicted values
            sample_weight :
                Optional sample weights.

            grad: array_like of shape [n_samples]
                The value of the gradient for each sample point.
            hess: array_like of shape [n_samples]

In [None]:
param_dict = {
    "n"
}

In [None]:
clf = xgb.XGBClassifier()

## Grid Search for Single Tree Xgboost(Theoritically it is equal to DT)

In [None]:
param_dict = {
  "learning_rate": [0.1, 0.15, 0.2, 0.5, 0.7, 0.9, 1],
  "max_depth": [4,5,6,7,8,10,12,16,24,32],
  'colsample_bytree': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
  'colsample_bylevel': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],
  'colsample_bynode': [0.1, 0.3, 0.5, 0.7, 0.9, 1.0],  
  "num_parallel_tree": [1],
  "objective": ["multi:softmax"],
  'num_class': [4]
}