# XGBoost Classification

In [1]:
from sklearn.datasets import  load_iris

In [2]:
iris = load_iris()
data =  iris.data
target = iris.target

In [3]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [6]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size = 0.3)

In [8]:
from xgboost import XGBClassifier

In [11]:
model = XGBClassifier()

In [13]:
model.fit(X_train, y_train)

In [15]:
predictions = model.predict(X_test)

In [17]:
from sklearn.metrics import accuracy_score

In [19]:
accuracy = accuracy_score(y_test, predictions)

In [21]:
print(f"Accuracy: {accuracy}")

Accuracy: 1.0


# Hyperparameter Tuning using GridSearchCV

In [24]:
from sklearn.model_selection import GridSearchCV

In [26]:
param_grid = {
    'n_estimators': [50, 100, 200],
    'max_depth': [3, 5, 7],
    'learning_rate': [0.01, 0.1, 0.2],
    'subsample': [0.7, 0.8, 1.0],
    'colsample_bytree': [0.7, 0.8, 1.0]
}

In [28]:
grid_search = GridSearchCV(model, param_grid, cv=2)

In [30]:
grid_search.fit(X_train, y_train)

In [32]:
grid_search.best_params_

{'colsample_bytree': 0.7,
 'learning_rate': 0.01,
 'max_depth': 3,
 'n_estimators': 50,
 'subsample': 0.8}

In [34]:
best_model = grid_search.best_estimator_

In [36]:
best_model.fit(X_train, y_train)

In [38]:
predictions = best_model.predict(X_test)

In [40]:
accuracy = accuracy_score(y_test, predictions)

In [42]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.9777777777777777


# Hyperparameter Tuning using Optuna

In [45]:
import optuna

In [51]:
# Define objective function for Optuna
def objective(trial):
    params = {
        'n_estimators': trial.suggest_int('n_estimators', 50, 300, step=50),
        'max_depth': trial.suggest_int('max_depth', 3, 10),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
        'subsample': trial.suggest_float('subsample', 0.6, 1.0),
        'colsample_bytree': trial.suggest_float('colsample_bytree', 0.6, 1.0)
    }

    model = XGBClassifier(**params, use_label_encoder=False, eval_metric='mlogloss', random_state=42)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Accuracy = accuracy_score(y_test, y_pred)
    return Accuracy

In [53]:
# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=50)

[I 2025-03-01 15:12:03,495] A new study created in memory with name: no-name-cf08a4b7-cda5-47d7-bd3e-8c43093180a4
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
Parameters: { "use_label_encoder" } are not used.

[I 2025-03-01 15:12:03,704] Trial 0 finished with value: 0.9777777777777777 and parameters: {'n_estimators': 150, 'max_depth': 3, 'learning_rate': 0.0477083940338331, 'subsample': 0.9322958194800808, 'colsample_bytree': 0.7322751976590864}. Best is trial 0 with value: 0.9777777777777777.
  'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.3),
Parameters: { "use_label_encoder" } are not used.

[I 2025-03-01 15:12:03,819] Trial 1 finished with value: 0.9777777777777777 and parameters: {'n_estimators': 100, 'max_depth': 7, 'learning_rate': 0.013688729356519708, 'subsample': 0.9945166361567707, 'colsample_bytree': 0.7433605096922616}. Best is trial 0 with value: 0.9777777777777777.
  'learning_rate': trial.suggest_loguniform('learning_rate

In [55]:
study.best_params

{'n_estimators': 200,
 'max_depth': 3,
 'learning_rate': 0.1394561301242773,
 'subsample': 0.8066321083958784,
 'colsample_bytree': 0.7648990603947278}

In [57]:
study.best_value

1.0

In [59]:
best_params = study.best_params

In [61]:
best_model1 = XGBClassifier(**best_params)

In [63]:
best_model1.fit(X_train, y_train)

In [65]:
predictions = best_model1.predict(X_test)

In [67]:
accuracy = accuracy_score(y_test, predictions)

In [69]:
print(f"Accuracy: {accuracy}")

Accuracy: 1.0
