In [12]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
from sklearn.model_selection import train_test_split
from sklearn.ensemble import RandomForestClassifier
from sklearn.linear_model import LogisticRegression
from sklearn.preprocessing import StandardScaler, OneHotEncoder,LabelEncoder
from sklearn.metrics import classification_report, roc_curve, auc, confusion_matrix, accuracy_score
from sklearn.compose import ColumnTransformer
from sklearn.pipeline import Pipeline

In [2]:
df = pd.read_csv("patient_data.csv")
df

Unnamed: 0,PatientID,Age,Sex,BMI,Smoking,Diabetes,Hypertension,HeartDisease,SurgeryType,Procedure,AnesthesiaType,Duration (min),Complication
0,1,45,Male,27.3,No,Yes,Yes,No,General Surgery,Appendectomy,General,90,No
1,2,62,Female,30.1,Yes,Yes,Yes,Yes,Cardiovascular,Coronary Bypass Surgery,General,180,Yes
2,3,29,Male,24.7,No,No,No,No,Orthopedic,Knee Arthroscopy,Regional,60,No
3,4,51,Female,28.9,Yes,No,Yes,Yes,Gastrointestinal,Gallbladder Removal,General,120,Yes
4,5,38,Female,22.5,No,No,No,No,Obstetrics,Cesarean Section,Spinal,75,No
5,6,70,Male,32.8,Yes,Yes,Yes,Yes,Orthopedic,Hip Replacement,General,150,Yes
6,7,55,Female,29.4,No,No,Yes,No,Gynecological,Total Hysterectomy,General,140,No
7,8,40,Male,26.0,Yes,No,No,No,General Surgery,Hernia Repair,Local,50,No
8,9,47,Male,28.5,Yes,Yes,Yes,Yes,Urology,Prostate Surgery,Regional,90,No
9,10,63,Female,35.0,Yes,Yes,Yes,Yes,Neurological,Spinal Fusion,General,210,Yes


In [4]:
df.shape

(50, 13)

In [5]:
# preprocess the data
# encode categorical columns/ variables

categorical_columns = [
    "Sex",
    "Smoking",
    "Diabetes",
    "Hypertension",
    "HeartDisease",
    "SurgeryType",
    "Procedure",
    "AnesthesiaType",
    "Complication",
]

In [7]:
# label encoders
label_encoders = {}
for col in categorical_columns:
    le = LabelEncoder()
    df[col] = le.fit_transform(df[col])
    label_encoders[col] = le

In [18]:
# Exclude PatientID from features
X = df.drop(columns=["Complication", "PatientID"])
y = df["Complication"]

In [20]:
# normalize numerical features
scaler = StandardScaler()
X[["Age", "BMI", "Duration (min)"]] = scaler.fit_transform(
    X[["Age", "BMI", "Duration (min)"]]
)

In [21]:
# split the dataset
# Split the dataset
X_train, X_test, y_train, y_test = train_test_split(
    X, y, test_size=0.2, random_state=42
)

In [22]:
# Train the Model
model = LogisticRegression(random_state=42, max_iter=200)
model.fit(X_train, y_train)

In [13]:
# Evaluate the Model
y_pred = model.predict(X_test)
print("Accuracy:", accuracy_score(y_test, y_pred))
print("\nClassification Report:\n", classification_report(y_test, y_pred))
print("\nConfusion Matrix:\n", confusion_matrix(y_test, y_pred))

Accuracy: 0.1

Classification Report:
               precision    recall  f1-score   support

           0       0.00      0.00      0.00         7
           1       0.12      0.33      0.18         3

    accuracy                           0.10        10
   macro avg       0.06      0.17      0.09        10
weighted avg       0.04      0.10      0.05        10


Confusion Matrix:
 [[0 7]
 [2 1]]


In [23]:
# Make Predictions
sample_patient = {
    "Age": 45,
    "Sex": "Male",
    "BMI": 27.3,
    "Smoking": "No",
    "Diabetes": "Yes",
    "Hypertension": "Yes",
    "HeartDisease": "No",
    "SurgeryType": "General Surgery",
    "Procedure": "Appendectomy",
    "AnesthesiaType": "General",
    "Duration (min)": 90,
}

In [24]:
# Encode and scale the sample patient
sample_df = pd.DataFrame([sample_patient])
for col in categorical_columns:
    if col != "Complication":  # Skip target column
        sample_df[col] = label_encoders[col].transform(sample_df[col])
sample_df[["Age", "BMI", "Duration (min)"]] = scaler.transform(
    sample_df[["Age", "BMI", "Duration (min)"]]
)

In [25]:
# Predict the outcome
prediction = model.predict(sample_df)
predicted_label = label_encoders["Complication"].inverse_transform(prediction)
print("\nPrediction for the sample patient:", predicted_label[0])


Prediction for the sample patient: Yes
