### Метод CatBoost для прогнозирования возвратов кредитов

Подключим необходимые модули

In [None]:
from google.colab import drive
import pandas as pd
import sklearn

drive.mount('/content/drive')

Mounted at /content/drive


Загрузим заранее предобработанный датасет

In [None]:
df = pd.read_csv("/content/drive/MyDrive/bank_churners_preprocessed.csv")
df

Unnamed: 0.1,Unnamed: 0,Attrition_Flag,Customer_Age,Dependent_count,Education_Level,Income_Category,Card_Category,Months_on_book,Total_Relationship_Count,Months_Inactive_12_mon,...,Total_Trans_Amt,Total_Trans_Ct,Total_Ct_Chng_Q4_Q1,Avg_Utilization_Ratio,c_F,c_M,c_Divorced,c_Married,c_Single,c_Unknown
0,0,0,45,3,2,3,0,39,5,1,...,1144,42,1.625,0.061,0,1,0,1,0,0
1,1,0,49,5,5,1,0,44,6,1,...,1291,33,3.714,0.105,1,0,0,0,1,0
2,2,0,51,3,5,4,0,36,4,1,...,1887,20,2.333,0.000,0,1,0,1,0,0
3,3,0,40,4,2,1,0,34,3,4,...,1171,20,2.333,0.760,1,0,0,0,0,1
4,4,0,40,3,1,3,0,21,5,1,...,816,28,2.500,0.000,0,1,0,1,0,0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
10122,10122,0,50,2,5,2,0,40,3,2,...,15476,117,0.857,0.462,0,1,0,0,1,0
10123,10123,1,41,2,0,2,0,25,4,2,...,8764,69,0.683,0.511,0,1,1,0,0,0
10124,10124,1,44,1,2,1,0,36,5,3,...,10291,60,0.818,0.000,1,0,0,1,0,0
10125,10125,1,30,2,5,2,0,36,4,3,...,8395,62,0.722,0.000,0,1,0,0,0,1


Удалим ненужную колонку

In [None]:
df.drop(columns = ["Unnamed: 0"], inplace=True)

Подключим модули для обучения модели

In [None]:
pip install catboost

In [None]:
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.preprocessing import StandardScaler
from sklearn.metrics import accuracy_score, classification_report, confusion_matrix, r2_score
from catboost import CatBoostClassifier

Подготовим данные для обучения

In [None]:
y = df['Attrition_Flag']
X = df.drop(columns = ['Attrition_Flag'])
X_train, X_test, y_train, y_test = train_test_split(X, y, stratify=y, test_size=0.2)

Попробуем обучить модель

In [None]:
scaler = StandardScaler()
X_train_scaled = scaler.fit_transform(X_train)
X_test_scaled = scaler.transform(X_test)

param_grid = {
    'iterations': [100, 200],
    'learning_rate': [0.01, 0.1],
    'depth': [3, 6],
    'loss_function': ['MultiClass']
}

grid_search = GridSearchCV(
    estimator=CatBoostClassifier(verbose=False),
    param_grid=param_grid,
    cv=5,
    n_jobs=-1
)

grid_search.fit(X_train_scaled, y_train)
best_params = grid_search.best_params_
best_model = grid_search.best_estimator_
y_pred = best_model.predict(X_test_scaled)
print(classification_report(y_test, y_pred, zero_division=1))


              precision    recall  f1-score   support

           0       0.98      0.99      0.98      1701
           1       0.94      0.90      0.92       325

    accuracy                           0.97      2026
   macro avg       0.96      0.94      0.95      2026
weighted avg       0.97      0.97      0.97      2026



CatBoost показывает очень хорошие результаты