In [None]:
# Dataset https://www.kaggle.com/datasets/jaderz/hospital-beds-management/code

In [1]:
from sklearn.tree import DecisionTreeRegressor
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import OneHotEncoder, StandardScaler
from sklearn.ensemble import RandomForestRegressor
from sklearn.compose import ColumnTransformer
from sklearn.metrics import r2_score, mean_absolute_error, mean_squared_error
from sklearn.linear_model import LinearRegression, Lasso, Ridge, ElasticNet
import numpy as np

import kagglehub
import pandas as pd
import datetime
import seaborn as sns
import matplotlib.pyplot as plt

In [2]:
services_weekly = pd.read_csv('/kaggle/input/hospital-beds-management/services_weekly.csv')
staff = pd.read_csv('/kaggle/input/hospital-beds-management/staff.csv')
staff_schedule = pd.read_csv('/kaggle/input/hospital-beds-management/staff_schedule.csv')
patients = pd.read_csv('/kaggle/input/hospital-beds-management/patients.csv')

**Feature Engineering**

In [3]:
def get_week_of_year(dt):
    return dt.isocalendar().week


# Extract services_weekly data for each patient

def get_week_data(patient, feature):
    wa = patient['week_arrival']
    srv = patient['service']
    match = services_weekly[
        (services_weekly['week'] == wa) &
        (services_weekly['service'] == srv)
    ][feature]
    return match.iloc[0] if not match.empty else None

patients['dt_arrival'] = pd.to_datetime(patients['arrival_date'])
patients['dt_departure'] = pd.to_datetime(patients['departure_date'])
patients['week_arrival'] = patients['dt_arrival'].apply(get_week_of_year)
patients['week_departure'] = patients['dt_departure'].apply(get_week_of_year)

weekly_services_attr = ['staff_morale', 'patient_satisfaction', 'event']

for feature in weekly_services_attr:
  patients[f'pat_{feature}'] = patients.apply(lambda pat: get_week_data(pat, feature), axis=1)


patients['stay_duration'] = ( pd.to_datetime(patients['departure_date'], format="%Y-%m-%d") - pd.to_datetime(patients['arrival_date'], format="%Y-%m-%d" ) ).dt.days

patients['has_event'] = patients['pat_event'] != 'none'


**Processing**

In [4]:
col_transformer = ColumnTransformer(
    transformers=[
        ("num", StandardScaler(), [ "age", "pat_staff_morale", "pat_patient_satisfaction", "stay_duration", "week_arrival"] ),
        ("cat", OneHotEncoder(drop='first', sparse_output=False), ["pat_event", "service"])
    ]
)

X = patients.drop(columns=['satisfaction', 'patient_id', 'name', 'arrival_date', 'departure_date', 'dt_departure', 'dt_arrival'])

y = patients['satisfaction']
y_strata = pd.qcut(y, q=4, labels=False)

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42, stratify=y_strata)

X_train = col_transformer.fit_transform(X_train)
X_test = col_transformer.transform(X_test)

In [5]:
def show_metrics(y_test, _pred, model_name):
    r2 = r2_score(y_test, y_pred)
    mae = mean_absolute_error(y_test, y_pred)
    mse = mean_squared_error(y_test, y_pred)
    print(f"================== {model_name} ======================")
    print(f"r2: {r2:.2f}")
    print(f"mean_absolute_error: {mae:.2f}")
    print(f"mean_squared_error: {mse:.2f}")
    print("===============================================")

linear_reg = Lasso()
linear_reg.fit(X_train, y_train)
y_pred = linear_reg.predict(X_test)

show_metrics(y_test, y_pred, "Lasso")

linear_reg = Ridge()
linear_reg.fit(X_train, y_train)
y_pred = linear_reg.predict(X_test)

show_metrics(y_test, y_pred, "Ridge")

rf = RandomForestRegressor()

rf.fit(X_train, y_train)

y_pred=rf.predict(X_test)

show_metrics(y_test, y_pred, "RandomForestRegressor")

r2: -0.00
mean_absolute_error: 9.66
mean_squared_error: 127.91
r2: -0.04
mean_absolute_error: 9.83
mean_squared_error: 132.94
r2: -0.15
mean_absolute_error: 10.29
mean_squared_error: 146.05
