<center>

# DSC680 Applied Data Science 

## Week 7

## Term Project - 2

## Smart Healthcare Operations: Leveraging AI for Efficient Staff Allocation and Patient Care

### Karthika Velingiri

### 03-Feburary-2026

</center>

In [None]:
# =========================================
# Smart Healthcare Operations
# Predictive & Prescriptive Analytics
# =========================================

# ---------- Imports ----------
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns
from pathlib import Path

from sklearn.model_selection import train_test_split
from sklearn.preprocessing import LabelEncoder
from sklearn.linear_model import LinearRegression, LogisticRegression
from sklearn.metrics import mean_absolute_error, accuracy_score, confusion_matrix

# ---------- Paths ----------
BASE_DIR = Path("..")
DATA_DIR = BASE_DIR / "data"
FIG_DIR = BASE_DIR / "figures"
FIG_DIR.mkdir(exist_ok=True)

# ---------- Load Data ----------
patients = pd.read_csv(DATA_DIR / "patients.csv")
staff = pd.read_csv(DATA_DIR / "staff.csv")
services = pd.read_csv(DATA_DIR / "services_weekly.csv")
schedule = pd.read_csv(DATA_DIR / "staff_schedule.csv")

# ---------- Data Preparation ----------
# Patients: Length of stay
patients["arrival_date"] = pd.to_datetime(patients["arrival_date"], dayfirst=True)
patients["departure_date"] = pd.to_datetime(patients["departure_date"], dayfirst=True)
patients["length_of_stay"] = (
    patients["departure_date"] - patients["arrival_date"]
).dt.days

# Services: Rates
services["refusal_rate"] = services["patients_refused"] / services["patients_request"]
services["admission_rate"] = services["patients_admitted"] / services["patients_request"]

# Staff schedule: absenteeism flag
schedule["absent"] = schedule["present"].apply(lambda x: 1 if x == 0 else 0)

# ---------- Visualizations ----------
# Demand vs Beds
plt.figure(figsize=(8,5))
sns.scatterplot(
    data=services,
    x="available_beds",
    y="patients_request",
    hue="service"
)
plt.title("Patient Demand vs Available Beds")
plt.tight_layout()
plt.savefig(FIG_DIR / "demand_vs_beds.png")
plt.close()

# Refusal rate by service
plt.figure(figsize=(8,5))
sns.barplot(
    data=services,
    x="service",
    y="refusal_rate"
)
plt.title("Patient Refusal Rate by Service")
plt.xticks(rotation=45)
plt.tight_layout()
plt.savefig(FIG_DIR / "refusal_rate_by_service.png")
plt.close()

# Staff morale vs patient satisfaction
plt.figure(figsize=(8,5))
sns.scatterplot(
    data=services,
    x="staff_morale",
    y="patient_satisfaction",
    hue="service"
)
plt.title("Staff Morale vs Patient Satisfaction")
plt.tight_layout()
plt.savefig(FIG_DIR / "morale_vs_satisfaction.png")
plt.close()

# ---------- Predictive Model 1: Patient Demand (Regression) ----------
X_reg = services[["available_beds", "staff_morale", "month"]]
y_reg = services["patients_request"]

X_train, X_test, y_train, y_test = train_test_split(
    X_reg, y_reg, test_size=0.3, random_state=42
)

reg_model = LinearRegression()
reg_model.fit(X_train, y_train)

y_pred = reg_model.predict(X_test)
mae = mean_absolute_error(y_test, y_pred)

print("Patient Demand Forecast MAE:", mae)

# Feature importance
reg_importance = pd.DataFrame({
    "Feature": X_reg.columns,
    "Coefficient": reg_model.coef_
})
print("\nRegression Feature Importance:")
print(reg_importance)

# ---------- Predictive Model 2: Staff Absenteeism (Classification) ----------
staff_weekly = schedule.merge(
    staff, on=["staff_id", "staff_name", "role", "service"], how="left"
)

le = LabelEncoder()
staff_weekly["service_enc"] = le.fit_transform(staff_weekly["service"])
staff_weekly["role_enc"] = le.fit_transform(staff_weekly["role"])

X_clf = staff_weekly[["week", "service_enc", "role_enc"]]
y_clf = staff_weekly["absent"]

X_train, X_test, y_train, y_test = train_test_split(
    X_clf, y_clf, test_size=0.3, random_state=42
)

clf = LogisticRegression()
clf.fit(X_train, y_train)

y_pred = clf.predict(X_test)
accuracy = accuracy_score(y_test, y_pred)

print("\nStaff Absenteeism Prediction Accuracy:", accuracy)

# Confusion matrix
cm = confusion_matrix(y_test, y_pred)
plt.figure(figsize=(5,4))
sns.heatmap(cm, annot=True, fmt="d", cmap="Blues")
plt.title("Staff Absenteeism Confusion Matrix")
plt.xlabel("Predicted")
plt.ylabel("Actual")
plt.tight_layout()
plt.savefig(FIG_DIR / "absenteeism_confusion_matrix.png")
plt.close()

# ---------- Prescriptive Scenario Simulation ----------
# Increase available beds by 10%
services_sim = services.copy()
services_sim["available_beds"] = services_sim["available_beds"] * 1.10

services_sim["predicted_demand"] = reg_model.predict(
    services_sim[["available_beds", "staff_morale", "month"]]
)

print("\nPrescriptive Scenario: Increased Bed Capacity")
print(services_sim[[
    "service",
    "patients_request",
    "predicted_demand",
    "refusal_rate"
]])
