# CatBoost Model for Predicting Surgical Complications

In [7]:
import pandas as pd
from catboost import CatBoostClassifier
from sklearn.model_selection import train_test_split, GridSearchCV
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report

## Load and Prepare Data

In [8]:
# Load the data
surgical_data = pd.read_csv('Surgical.csv')

# Prepare the data
X = surgical_data.drop('complication', axis=1)
y = surgical_data['complication']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical features
categorical_features_indices = [i for i, col in enumerate(X_train.columns) if X_train[col].dtype == 'object']

## Train CatBoost Model

In [9]:
# Train the CatBoost model
model = CatBoostClassifier(loss_function='Logloss', cat_features=categorical_features_indices, verbose=0)
param_grid = {
    'iterations': [100, 500, 1000],
    'depth': [4, 6, 8],
    'learning_rate': [0.01, 0.1, 0.2]
}
model.fit(X_train, y_train)

# GridSearchCV
grid_search = GridSearchCV(estimator=model, param_grid=param_grid, cv=3, scoring='roc_auc', verbose=10)
grid_search.fit(X_train, y_train)

Fitting 3 folds for each of 27 candidates, totalling 81 fits
[CV 1/3; 1/27] START depth=4, iterations=100, learning_rate=0.01................
[CV 1/3; 1/27] END depth=4, iterations=100, learning_rate=0.01;, score=0.859 total time=   0.2s
[CV 2/3; 1/27] START depth=4, iterations=100, learning_rate=0.01................
[CV 2/3; 1/27] END depth=4, iterations=100, learning_rate=0.01;, score=0.875 total time=   0.2s
[CV 3/3; 1/27] START depth=4, iterations=100, learning_rate=0.01................
[CV 3/3; 1/27] END depth=4, iterations=100, learning_rate=0.01;, score=0.875 total time=   0.2s
[CV 1/3; 2/27] START depth=4, iterations=100, learning_rate=0.1.................
[CV 1/3; 2/27] END depth=4, iterations=100, learning_rate=0.1;, score=0.928 total time=   0.2s
[CV 2/3; 2/27] START depth=4, iterations=100, learning_rate=0.1.................
[CV 2/3; 2/27] END depth=4, iterations=100, learning_rate=0.1;, score=0.930 total time=   0.2s
[CV 3/3; 2/27] START depth=4, iterations=100, learning_r

## Evaluate Model

In [12]:
# Лучшие параметры
best_params = grid_search.best_params_
print("Best parameters found: ", best_params)

# Обучение модели с лучшими параметрами
best_model = grid_search.best_estimator_
best_model.fit(X_train, y_train)

# Оценка модели
y_pred = best_model.predict(X_test)
y_pred_proba = best_model.predict_proba(X_test)[:, 1]

print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Сохранение модели
best_model.save_model('catboost_surgical_model_best.cbm')

Best parameters found:  {'depth': 6, 'iterations': 1000, 'learning_rate': 0.01}
Accuracy: 0.911855141783396
ROC AUC Score: 0.9332815040847927
Confusion Matrix:
 [[2197   40]
 [ 218  472]]
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.98      0.94      2237
           1       0.92      0.68      0.79       690

    accuracy                           0.91      2927
   macro avg       0.92      0.83      0.86      2927
weighted avg       0.91      0.91      0.91      2927

