# Basic utilities

In [5]:
import random


random.seed(0)
SEED = random.randint(0, 10**6)

In [6]:
from sklearn.metrics import accuracy_score


def score(y_test, y_pred):
    print(f"Accuracy: {accuracy_score(y_test, y_pred)}")

# Loading iris

In [7]:
from sklearn.datasets import load_iris
from sklearn.model_selection import train_test_split


data = load_iris()
X, y = data['data'], data['target']
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=.33, random_state=SEED)

# Basic usage of XGBClassifier

In [8]:
from xgboost import XGBClassifier

model = XGBClassifier(objective='binary:logistic', seed=SEED, seed_per_iteration=True)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score(y_test, y_pred)

Accuracy: 1.0


# XGBClassifier with better hyper parameters

In [9]:
from xgboost import XGBClassifier
from math import sqrt, floor


m = len(X_train[0])
model = XGBClassifier(
    learning_rate = 0.2, # 0.0 - 1.0, log, not sure if each forest can have its own, it might lead to overfitting anyway

    max_depth = 20, # 2 - 50, 6 - 20 is another more conservative option
    subsample = 0.63, # 0.0 - 1.0
    # u can set only one of the colsample_by*
    # colsample_bytree: Optional[float] = None,
    # colsample_bylevel: Optional[float] = None,
    colsample_bynode = floor(sqrt(m))/m, # m..num_of_features, 0 - m, log?
    n_estimators = 100, # 100 - 500 = number of random forests in booster
    num_parallel_tree = 100, # 100 - 500 = number of trees in each random forest
    reg_lambda = 0, # -10. - 10.0, log = prunning of trees, higher value -> more prunning, not sure if negative values do anything
    min_child_weight = 2, # 0.0 - 10.0, log, higher value -> less options to choose from when selecting new nodes in trees

    objective = 'binary:logistic', # list at https://xgboost.readthedocs.io/en/stable/parameter.html, search for objective
    seed=SEED, 
    seed_per_iteration=True

)
model.fit(X_train, y_train)
y_pred = model.predict(X_test)
score(y_test, y_pred)

KeyboardInterrupt: 