# CatBoost Classification

In [2]:
from sklearn.datasets import  load_iris

In [4]:
iris = load_iris()
data =  iris.data
target = iris.target

In [6]:
iris

{'data': array([[5.1, 3.5, 1.4, 0.2],
        [4.9, 3. , 1.4, 0.2],
        [4.7, 3.2, 1.3, 0.2],
        [4.6, 3.1, 1.5, 0.2],
        [5. , 3.6, 1.4, 0.2],
        [5.4, 3.9, 1.7, 0.4],
        [4.6, 3.4, 1.4, 0.3],
        [5. , 3.4, 1.5, 0.2],
        [4.4, 2.9, 1.4, 0.2],
        [4.9, 3.1, 1.5, 0.1],
        [5.4, 3.7, 1.5, 0.2],
        [4.8, 3.4, 1.6, 0.2],
        [4.8, 3. , 1.4, 0.1],
        [4.3, 3. , 1.1, 0.1],
        [5.8, 4. , 1.2, 0.2],
        [5.7, 4.4, 1.5, 0.4],
        [5.4, 3.9, 1.3, 0.4],
        [5.1, 3.5, 1.4, 0.3],
        [5.7, 3.8, 1.7, 0.3],
        [5.1, 3.8, 1.5, 0.3],
        [5.4, 3.4, 1.7, 0.2],
        [5.1, 3.7, 1.5, 0.4],
        [4.6, 3.6, 1. , 0.2],
        [5.1, 3.3, 1.7, 0.5],
        [4.8, 3.4, 1.9, 0.2],
        [5. , 3. , 1.6, 0.2],
        [5. , 3.4, 1.6, 0.4],
        [5.2, 3.5, 1.5, 0.2],
        [5.2, 3.4, 1.4, 0.2],
        [4.7, 3.2, 1.6, 0.2],
        [4.8, 3.1, 1.6, 0.2],
        [5.4, 3.4, 1.5, 0.4],
        [5.2, 4.1, 1.5, 0.1],
  

In [8]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(data, target, test_size = 0.3)

In [10]:
from catboost import CatBoostClassifier

In [12]:
model = CatBoostClassifier()

In [14]:
model.fit(X_train, y_train)

Learning rate set to 0.070535
0:	learn: 1.0171919	total: 155ms	remaining: 2m 35s
1:	learn: 0.9526635	total: 157ms	remaining: 1m 18s
2:	learn: 0.8967760	total: 158ms	remaining: 52.4s
3:	learn: 0.8490184	total: 159ms	remaining: 39.6s
4:	learn: 0.7970933	total: 160ms	remaining: 31.8s
5:	learn: 0.7446618	total: 161ms	remaining: 26.6s
6:	learn: 0.7121799	total: 161ms	remaining: 22.8s
7:	learn: 0.6748181	total: 162ms	remaining: 20.1s
8:	learn: 0.6404995	total: 163ms	remaining: 17.9s
9:	learn: 0.6064331	total: 163ms	remaining: 16.1s
10:	learn: 0.5743833	total: 164ms	remaining: 14.7s
11:	learn: 0.5481030	total: 165ms	remaining: 13.6s
12:	learn: 0.5245620	total: 165ms	remaining: 12.6s
13:	learn: 0.5009668	total: 166ms	remaining: 11.7s
14:	learn: 0.4771534	total: 167ms	remaining: 10.9s
15:	learn: 0.4570051	total: 167ms	remaining: 10.3s
16:	learn: 0.4357691	total: 168ms	remaining: 9.7s
17:	learn: 0.4180201	total: 168ms	remaining: 9.19s
18:	learn: 0.4056636	total: 169ms	remaining: 8.73s
19:	learn:

<catboost.core.CatBoostClassifier at 0x234ddddb0e0>

In [16]:
predictions = model.predict(X_test)

In [18]:
from sklearn.metrics import accuracy_score

In [20]:
accuracy = accuracy_score(y_test, predictions)

In [22]:
print(f"Accuracy: {accuracy}")

Accuracy: 0.9555555555555556


# Hyperparameter Tuning using GridSearchCV

In [None]:
from sklearn.model_selection import GridSearchCV

In [None]:
param_grid = {
    'iterations': [100, 300, 500],
    'learning_rate': [0.01, 0.05, 0.1],
    'depth': [4, 6, 8],
    'l2_leaf_reg': [1, 3, 5],
    'border_count': [32, 64, 128]
}

In [None]:
grid_search = GridSearchCV(model, param_grid, cv=2)

In [None]:
grid_search.fit(X_train, y_train)

In [None]:
grid_search.best_params_

In [None]:
best_model = grid_search.best_estimator_

In [None]:
best_model.fit(X_train, y_train)

In [None]:
predictions = best_model.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, predictions)

In [None]:
print(f"Accuracy: {accuracy}")

# Hyperparameter Tuning using Optuna

In [None]:
import optuna

In [None]:
# Define the objective function for Optuna
def objective(trial):
    param = {
        'iterations': trial.suggest_int('iterations', 100, 500),
        'learning_rate': trial.suggest_loguniform('learning_rate', 0.01, 0.1),
        'depth': trial.suggest_int('depth', 4, 10),
        'l2_leaf_reg': trial.suggest_int('l2_leaf_reg', 1, 10),
        'border_count': trial.suggest_int('border_count', 32, 128),
        'verbose': 0
    }
    
    model = CatBoostClassifier(**param)
    model.fit(X_train, y_train)
    y_pred = model.predict(X_test)
    Accuracy = accuracy_score(y_test, y_pred)
    return Accuracy

In [None]:
# Run Optuna optimization
study = optuna.create_study(direction='maximize')
study.optimize(objective, n_trials=30)

In [None]:
study.best_params

In [None]:
study.best_value

In [None]:
best_params = study.best_params

In [None]:
best_model1 = CatBoostClassifier(**best_params)

In [None]:
best_model1.fit(X_train, y_train)

In [None]:
predictions = best_model1.predict(X_test)

In [None]:
accuracy = accuracy_score(y_test, predictions)

In [None]:
print(f"Accuracy: {accuracy}")