In [1]:
import pandas as pd
import numpy as np

In [3]:
np.random.seed(42)

In [5]:
dates = pd.date_range(start="2023-01-01", end="2024-12-31", freq="D")


In [7]:
departments = {
    "ER": {"base_patients": 140, "beds": 50, "wait_base": 45},
    "ICU": {"base_patients": 25, "beds": 20, "wait_base": 30},
    "OPD": {"base_patients": 90, "beds": 40, "wait_base": 20},
    "SURGERY": {"base_patients": 35, "beds": 25, "wait_base": 60}
}


In [11]:
weather_data = []

for date in dates:
    rainy = np.random.rand() < 0.28
    weather_data.append([
        date,
        "Rainy" if rainy else "Clear",
        int(rainy)
    ])

weather = pd.DataFrame(
    weather_data,
    columns=["date", "weather_type", "rain_flag"]
)

In [13]:
visit_data = []

for date in dates:
    weekday = date.weekday()
    weekend_factor = 0.75 if weekday >= 5 else 1.0
    rain_factor = 1.15 if weather.loc[weather["date"] == date, "rain_flag"].iloc[0] == 1 else 1.0

    for dept, params in departments.items():
        base = params["base_patients"]
        patients = int(
            np.random.normal(base * weekend_factor * rain_factor, base * 0.15)
        )
        visit_data.append([date, dept, max(patients, 5)])

patient_visits = pd.DataFrame(
    visit_data, columns=["date", "department", "patient_count"]
)


In [15]:
flow_data = []

for _, row in patient_visits.iterrows():
    dept = row["department"]

    admit_rate = {
        "ER": 0.35,
        "ICU": 0.70,
        "OPD": 0.10,
        "SURGERY": 0.55
    }[dept]

    admissions = int(row["patient_count"] * admit_rate)
    discharges = int(admissions * np.random.uniform(0.7, 0.95))

    flow_data.append([
        row["date"],
        dept,
        admissions,
        discharges
    ])

admissions_df = pd.DataFrame(
    flow_data,
    columns=["date", "department", "admissions", "discharges"]
)


In [17]:
bed_data = []

for date in dates:
    for dept, params in departments.items():
        beds = params["beds"]
        occupancy = np.random.uniform(0.6, 1.05)
        occupied = min(int(beds * occupancy), beds)

        bed_data.append([date, dept, beds, occupied])

beds = pd.DataFrame(
    bed_data, columns=["date", "department", "total_beds", "beds_occupied"]
)


In [19]:
staff_data = []

for date in dates:
    for dept in departments.keys():
        doctors = np.random.randint(4, 10)
        nurses = np.random.randint(8, 20)

        staff_data.append([
            date,
            dept,
            doctors,
            nurses,
            doctors + nurses
        ])

staffing = pd.DataFrame(
    staff_data,
    columns=["date", "department", "doctors", "nurses", "staff_on_duty"]
)


In [21]:
wait_data = []

for _, row in patient_visits.iterrows():
    dept = row["department"]
    base_wait = departments[dept]["wait_base"]
    load_factor = row["patient_count"] / departments[dept]["base_patients"]

    wait_time = base_wait * load_factor * np.random.uniform(0.9, 1.3)

    wait_data.append([
        row["date"],
        dept,
        round(wait_time, 1)
    ])

wait_times = pd.DataFrame(
    wait_data, columns=["date", "department", "avg_wait_time"]
)


In [23]:
patient_visits.to_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/raw/patient_visits.csv", index=False)
admissions_df.to_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/raw/admissions.csv", index=False)
beds.to_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/raw/beds.csv", index=False)
staffing.to_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/raw/staffing.csv", index=False)
wait_times.to_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/raw/wait_times.csv", index=False)
weather.to_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/raw/weather.csv", index=False)
