# Post-Operative Data Analysis
Complete preprocessing and machine learning pipeline for surgical data analysis.

In [None]:
# Import libraries
import pandas as pd
import numpy as np
import sys
sys.path.append('../src')

from surgisense.utils import run_full_pipeline, split_data
from sklearn.ensemble import RandomForestClassifier
from sklearn.metrics import classification_report, confusion_matrix
import matplotlib.pyplot as plt
import seaborn as sns

In [None]:
# Load and explore raw data
df_raw = pd.read_csv('../data/post-operative-data.csv')
print("Raw data shape:", df_raw.shape)
print("\nColumn names:", df_raw.columns.tolist())
print("\nData types:")
print(df_raw.dtypes)
print("\nSample data:")
df_raw.head()

In [None]:
# Check unique values in each column
print("Unique values per column:")
for col in df_raw.columns:
    print(f"{col}: {df_raw[col].unique()}")

In [None]:
# Run complete preprocessing pipeline
df_processed = run_full_pipeline(df_raw)

print("Processed data shape:", df_processed.shape)
print("\nProcessed data types:")
print(df_processed.dtypes)
print("\nProcessed data sample:")
df_processed.head()

In [None]:
# Train-test split and model training
target_column = 'decision_adm-decs'

# Split data
X_train, X_test, y_train, y_test = split_data(df_processed, target_column)

# Train RandomForest model
model = RandomForestClassifier(random_state=42, n_estimators=100)
model.fit(X_train, y_train)

# Make predictions
y_pred = model.predict(X_test)

print("Model training completed!")

In [None]:
# Model evaluation
print("Classification Report:")
print(classification_report(y_test, y_pred))

print("\nConfusion Matrix:")
cm = confusion_matrix(y_test, y_pred)
print(cm)

# Plot confusion matrix
plt.figure(figsize=(8, 6))
sns.heatmap(cm, annot=True, fmt='d', cmap='Blues')
plt.title('Confusion Matrix')
plt.ylabel('Actual')
plt.xlabel('Predicted')
plt.show()

In [None]:
# Feature importance analysis
feature_names = X_train.columns
importances = model.feature_importances_

feature_importance_df = pd.DataFrame({
    'feature': feature_names,
    'importance': importances
}).sort_values('importance', ascending=False)

print("Feature Importance:")
print(feature_importance_df)

# Plot feature importance
plt.figure(figsize=(10, 6))
sns.barplot(data=feature_importance_df, x='importance', y='feature')
plt.title('Feature Importance in RandomForest Model')
plt.xlabel('Importance')
plt.tight_layout()
plt.show()