In [1]:
import pandas as pd


In [3]:
patient_visits = pd.read_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/patient_visits_clean.csv", parse_dates=["date"])
admissions = pd.read_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/admissions_clean.csv", parse_dates=["date"])
beds = pd.read_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/beds_clean.csv", parse_dates=["date"])
staffing = pd.read_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/staffing_clean.csv", parse_dates=["date"])
wait_times = pd.read_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/wait_times_clean.csv", parse_dates=["date"])
weather = pd.read_csv("C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/weather_clean.csv", parse_dates=["date"])

In [5]:
dim_date = pd.DataFrame({
    "date": pd.date_range(
        start=patient_visits["date"].min(),
        end=patient_visits["date"].max(),
        freq="D"
    )
})

dim_date["year"] = dim_date["date"].dt.year
dim_date["month"] = dim_date["date"].dt.month
dim_date["month_name"] = dim_date["date"].dt.strftime("%b")
dim_date["quarter"] = "Q" + dim_date["date"].dt.quarter.astype(str)
dim_date["weekday"] = dim_date["date"].dt.day_name()
dim_date["is_weekend"] = dim_date["date"].dt.weekday >= 5


In [9]:
dim_date.head()

Unnamed: 0,date,year,month,month_name,quarter,weekday,is_weekend
0,2023-01-01,2023,1,Jan,Q1,Sunday,True
1,2023-01-02,2023,1,Jan,Q1,Monday,False
2,2023-01-03,2023,1,Jan,Q1,Tuesday,False
3,2023-01-04,2023,1,Jan,Q1,Wednesday,False
4,2023-01-05,2023,1,Jan,Q1,Thursday,False


In [11]:
dim_department = pd.DataFrame({
    "department": patient_visits["department"].unique()
})

dim_department["dept_type"] = dim_department["department"].map({
    "ER": "Emergency",
    "ICU": "Critical Care",
    "OPD": "Outpatient",
    "SURGERY": "Inpatient"
})

dim_department["risk_level"] = dim_department["department"].map({
    "ER": "High",
    "ICU": "High",
    "SURGERY": "Medium",
    "OPD": "Low"
})


In [13]:
dim_department.head()

Unnamed: 0,department,dept_type,risk_level
0,ER,Emergency,High
1,ICU,Critical Care,High
2,OPD,Outpatient,Low
3,SURGERY,Inpatient,Medium


In [15]:
dim_weather = weather.drop_duplicates().reset_index(drop=True)


In [17]:
dim_weather.head()

Unnamed: 0,date,weather_type,rain_flag
0,2023-01-01,Clear,0
1,2023-01-02,Clear,0
2,2023-01-03,Clear,0
3,2023-01-04,Clear,0
4,2023-01-05,Clear,0


In [19]:
fact = patient_visits.merge(
    admissions,
    on=["date", "department"],
    how="left"
)

fact = fact.merge(
    beds,
    on=["date", "department"],
    how="left"
)

fact = fact.merge(
    staffing,
    on=["date", "department"],
    how="left"
)

fact = fact.merge(
    wait_times,
    on=["date", "department"],
    how="left"
)

fact = fact.merge(
    weather[["date", "rain_flag"]],
    on="date",
    how="left"
)


In [21]:
fact.head()

Unnamed: 0,date,department,patient_count,admissions,discharges,total_beds,beds_occupied,doctors,nurses,staff_on_duty,avg_wait_time,rain_flag
0,2023-01-01,ER,111,38,32,50,44,9,15,24,35.3,0
1,2023-01-01,ICU,15,10,7,20,14,7,18,25,23.1,0
2,2023-01-01,OPD,66,6,5,40,40,5,16,21,18.8,0
3,2023-01-01,SURGERY,27,14,13,25,16,4,12,16,55.1,0
4,2023-01-02,ER,146,51,43,50,50,9,12,21,58.8,0


In [23]:
fact_patient_flow = fact[[
    "date",
    "department",
    "patient_count",
    "admissions",
    "discharges",
    "beds_occupied",
    "total_beds",
    "staff_on_duty",
    "avg_wait_time",
    "rain_flag"
]]


In [25]:
fact_patient_flow.head()

Unnamed: 0,date,department,patient_count,admissions,discharges,beds_occupied,total_beds,staff_on_duty,avg_wait_time,rain_flag
0,2023-01-01,ER,111,38,32,44,50,24,35.3,0
1,2023-01-01,ICU,15,10,7,14,20,25,23.1,0
2,2023-01-01,OPD,66,6,5,40,40,21,18.8,0
3,2023-01-01,SURGERY,27,14,13,16,25,16,55.1,0
4,2023-01-02,ER,146,51,43,50,50,21,58.8,0


In [27]:
# Grain check
assert fact_patient_flow.duplicated(["date", "department"]).sum() == 0

# Null check
print(fact_patient_flow.isnull().sum())

fact_patient_flow.head()


date             0
department       0
patient_count    0
admissions       0
discharges       0
beds_occupied    0
total_beds       0
staff_on_duty    0
avg_wait_time    0
rain_flag        0
dtype: int64


Unnamed: 0,date,department,patient_count,admissions,discharges,beds_occupied,total_beds,staff_on_duty,avg_wait_time,rain_flag
0,2023-01-01,ER,111,38,32,44,50,24,35.3,0
1,2023-01-01,ICU,15,10,7,14,20,25,23.1,0
2,2023-01-01,OPD,66,6,5,40,40,21,18.8,0
3,2023-01-01,SURGERY,27,14,13,16,25,16,55.1,0
4,2023-01-02,ER,146,51,43,50,50,21,58.8,0


In [29]:
fact_patient_flow.to_csv(
    "C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/fact_patient_flow.csv", index=False
)

dim_date.to_csv(
    "C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/dim_date.csv", index=False
)

dim_department.to_csv(
    "C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/dim_department.csv", index=False
)

dim_weather.to_csv(
    "C:/Users/Abhi/Desktop/Power_BI_Data_Analysis_Project/Healthcare-Patient-Flow-Analytics/data/processed/dim_weather.csv", index=False
)
