In [None]:
# Import essential libraries
import numpy as np
import pandas as pd
from catboost import CatBoostClassifier, Pool, cv
from sklearn.datasets import make_classification
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
X, y = make_classification(
    n_samples=5000,
    n_features=20,
    n_informative=10,
    n_redundant=5,
    n_classes=3,
    n_clusters_per_class=2,
    weights=[0.2, 0.5, 0.3],
    flip_y=0.01,
    random_state=42
)

In [None]:
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.25, random_state=42, stratify=y
)

In [None]:
train_pool = Pool(data=X_train, label=y_train)
test_pool = Pool(data=X_test, label=y_test)

In [None]:
catboost_model = CatBoostClassifier(
    iterations=1000,
    learning_rate=0.05,
    depth=6,
    loss_function='MultiClass',
    eval_metric='MultiClass',
    random_seed=42,
    logging_level='Verbose',
    early_stopping_rounds=50
)

In [None]:
catboost_model.fit(
    train_pool,
    eval_set=test_pool,
    use_best_model=True,
    verbose=100
)

In [None]:
y_pred_probs = catboost_model.predict_proba(X_test)
y_pred = np.argmax(y_pred_probs, axis=1)

In [None]:
accuracy = accuracy_score(y_test, y_pred)
print(f"Test accuracy: {accuracy:.4f}")


In [None]:
print("\nClassification Report:")
print(classification_report(y_test, y_pred))

In [None]:
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(8,6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues',
            xticklabels=[f'Class {i}' for i in range(3)],
            yticklabels=[f'Class {i}' for i in range(3)])

In [None]:
plt.xlabel("Predicted")
plt.ylabel("True")
plt.title("Confusion Matrix")
plt.show()

In [None]:
feature_importances = catboost_model.get_feature_importance(train_pool)
feature_names = [f'feature_{i}' for i in range(X.shape[1])]

In [None]:
feat_imp_df = pd.DataFrame({
    'Feature': feature_names,
    'Importance': feature_importances
}).sort_values(by='Importance', ascending=False)


In [None]:
plt.figure(figsize=(10,6))
sns.barplot(x='Importance', y='Feature', data=feat_imp_df)
plt.title("Feature Importances from CatBoost")
plt.tight_layout()
plt.show()

In [None]:
param_grid = {
    'depth': [4, 6, 8],
    'learning_rate': [0.01, 0.05],
    'iterations': [200, 500]
}

In [None]:
catboost_for_grid = CatBoostClassifier(
    loss_function='MultiClass',
    random_seed=42,
    logging_level='Silent'
)

In [None]:
grid_search = GridSearchCV(
    estimator=catboost_for_grid,
    param_grid=param_grid,
    scoring='accuracy',
    cv=3,
    verbose=2,
    n_jobs=-1
)


In [None]:
grid_search.fit(X_train, y_train)

In [None]:
print(f"Best parameters: {grid_search.best_params_}")
print(f"Best cross-validation accuracy: {grid_search.best_score_:.4f}")

In [None]:
best_params = grid_search.best_params_
final_model = CatBoostClassifier(
    iterations=best_params['iterations'],
    depth=best_params['depth'],
    learning_rate=best_params['learning_rate'],
    loss_function='MultiClass',
    random_seed=42,
    logging_level='Verbose',
    early_stopping_rounds=50
)