# CatBoost Model for Predicting Surgical Complications

In [1]:
import pandas as pd
from catboost import CatBoostClassifier, Pool
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix, classification_report

## Load and Prepare Data

In [3]:

# Load the data
surgical_data = pd.read_csv('Surgical.csv')

# Prepare the data
X = surgical_data.drop('complication', axis=1)
y = surgical_data['complication']

# Split the data into training and test sets
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

# Identify categorical features
categorical_features_indices = [i for i, col in enumerate(X_train.columns) if X_train[col].dtype == 'object']


## Train CatBoost Model

In [4]:

# Train the CatBoost model
model = CatBoostClassifier(iterations=1000, depth=6, learning_rate=0.1, loss_function='Logloss', cat_features=categorical_features_indices, verbose=100)
model.fit(X_train, y_train)


0:	learn: 0.6153301	total: 64.1ms	remaining: 1m 4s
100:	learn: 0.2174967	total: 456ms	remaining: 4.06s
200:	learn: 0.1787684	total: 824ms	remaining: 3.27s
300:	learn: 0.1526681	total: 1.21s	remaining: 2.82s
400:	learn: 0.1317899	total: 1.6s	remaining: 2.39s
500:	learn: 0.1151831	total: 1.95s	remaining: 1.95s
600:	learn: 0.1002335	total: 2.29s	remaining: 1.52s
700:	learn: 0.0880422	total: 2.63s	remaining: 1.12s
800:	learn: 0.0771337	total: 2.97s	remaining: 737ms
900:	learn: 0.0681763	total: 3.3s	remaining: 363ms
999:	learn: 0.0608821	total: 3.64s	remaining: 0us


<catboost.core.CatBoostClassifier at 0x16bd5fd00>

## Evaluate Model

In [5]:

# Make predictions
y_pred = model.predict(X_test)
y_pred_proba = model.predict_proba(X_test)[:, 1]

# Evaluate the model
print("Accuracy:", accuracy_score(y_test, y_pred))
print("ROC AUC Score:", roc_auc_score(y_test, y_pred_proba))
print("Confusion Matrix:\n", confusion_matrix(y_test, y_pred))
print("Classification Report:\n", classification_report(y_test, y_pred))

# Save the model
model.save_model('catboost_surgical_model.cbm')


Accuracy: 0.9135633754697643
ROC AUC Score: 0.9200494969323563
Confusion Matrix:
 [[2188   49]
 [ 204  486]]
Classification Report:
               precision    recall  f1-score   support

           0       0.91      0.98      0.95      2237
           1       0.91      0.70      0.79       690

    accuracy                           0.91      2927
   macro avg       0.91      0.84      0.87      2927
weighted avg       0.91      0.91      0.91      2927

