# Bariatric Surgery Outcome Prediction using ML
This notebook trains and compares multiple machine learning models to predict outcomes (e.g., complications) for bariatric surgery patients.

In [1]:
# 📦 Step 1: Load and Prepare the Data
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

# Load your dataset
data = pd.read_csv('bariatric_dataset.csv')

# Display info
print(data.info())

# Fill missing values
data.fillna(data.median(numeric_only=True), inplace=True)

# Convert categorical variables
data = pd.get_dummies(data)

# Separate features and target
X = data.drop('postop_complication', axis=1)
y = data['postop_complication']

# Train-test split
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, stratify=y, random_state=42)


FileNotFoundError: [Errno 2] No such file or directory: 'bariatric_dataset.csv'

In [None]:
# 🌲 Step 2: Train Random Forest Model
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import accuracy_score, roc_auc_score, confusion_matrix

rf_model = RandomForestClassifier(n_estimators=100, random_state=42)
rf_model.fit(X_train, y_train)

rf_pred = rf_model.predict(X_test)
rf_prob = rf_model.predict_proba(X_test)[:, 1]

print("Random Forest Accuracy:", accuracy_score(y_test, rf_pred))
print("Random Forest AUC:", roc_auc_score(y_test, rf_prob))


In [None]:
# 🔍 SHAP for Random Forest
import shap
shap.initjs()

rf_explainer = shap.TreeExplainer(rf_model)
rf_shap_values = rf_explainer.shap_values(X_test)

shap.summary_plot(rf_shap_values[1], X_test)
shap.force_plot(rf_explainer.expected_value[1], rf_shap_values[1][0], X_test.iloc[0])


In [None]:
# 🌟 Train XGBoost Model
from xgboost import XGBClassifier

xgb_model = XGBClassifier(use_label_encoder=False, eval_metric='logloss', random_state=42)
xgb_model.fit(X_train, y_train)

xgb_pred = xgb_model.predict(X_test)
xgb_prob = xgb_model.predict_proba(X_test)[:, 1]

print("XGBoost Accuracy:", accuracy_score(y_test, xgb_pred))
print("XGBoost AUC:", roc_auc_score(y_test, xgb_prob))


In [None]:
# 🔍 SHAP for XGBoost
xgb_explainer = shap.TreeExplainer(xgb_model)
xgb_shap_values = xgb_explainer.shap_values(X_test)

shap.summary_plot(xgb_shap_values, X_test)
shap.force_plot(xgb_explainer.expected_value, xgb_shap_values[0], X_test.iloc[0])


In [None]:
# 🧠 Train Neural Network using Keras
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense
from tensorflow.keras.optimizers import Adam

# Define model
nn_model = Sequential([
    Dense(100, activation='relu', input_shape=(X_train.shape[1],)),
    Dense(50, activation='relu'),
    Dense(1, activation='sigmoid')
])

# Compile
nn_model.compile(optimizer=Adam(learning_rate=0.001), loss='binary_crossentropy', metrics=['accuracy'])

# Train
nn_model.fit(X_train, y_train, epochs=20, batch_size=32, verbose=1, validation_split=0.1)

# Predict
nn_prob = nn_model.predict(X_test).flatten()
nn_pred = (nn_prob >= 0.5).astype(int)

print("Neural Network Accuracy:", accuracy_score(y_test, nn_pred))
print("Neural Network AUC:", roc_auc_score(y_test, nn_prob))


In [None]:
# 🔍 SHAP for Keras Neural Network using DeepExplainer
import shap

# Sample background for efficiency
background = X_train.sample(100, random_state=42)
test_sample = X_test.sample(100, random_state=42)

# DeepExplainer
nn_explainer = shap.DeepExplainer(nn_model, background.values)
nn_shap_values = nn_explainer.shap_values(test_sample.values)

# Plot
shap.summary_plot(nn_shap_values[0], test_sample)
shap.force_plot(nn_explainer.expected_value[0], nn_shap_values[0][0], test_sample.iloc[0])


In [None]:
# 📊 Final Comparison
results = pd.DataFrame({
    'Model': ['Random Forest', 'XGBoost', 'Neural Network'],
    'Accuracy': [
        accuracy_score(y_test, rf_pred),
        accuracy_score(y_test, xgb_pred),
        accuracy_score(y_test, nn_pred)
    ],
    'AUC': [
        roc_auc_score(y_test, rf_prob),
        roc_auc_score(y_test, xgb_prob),
        roc_auc_score(y_test, nn_prob)
    ]
})

print(results)

import matplotlib.pyplot as plt
results.plot(x='Model', y=['Accuracy', 'AUC'], kind='bar', title='Model Comparison', ylim=(0, 1), rot=0)
plt.ylabel('Score')
plt.grid(True)
plt.show()
