In [1]:
import pandas as pd

In [10]:
df = pd.read_csv(r"C:\Users\ra481\OneDrive\Desktop\healthcare-analytics\Data\Raw\appointments.csv")
df.head()

Unnamed: 0,appointment_id,patient_id,doctor_id,appointment_date,appointment_time,reason_for_visit,status
0,A001,P034,D009,2023-08-09,15:15:00,Therapy,Scheduled
1,A002,P032,D004,2023-06-09,14:30:00,Therapy,No-show
2,A003,P048,D004,2023-06-28,8:00:00,Consultation,Cancelled
3,A004,P025,D006,2023-09-01,9:15:00,Consultation,Cancelled
4,A005,P040,D003,2023-07-06,12:45:00,Emergency,No-show


In [11]:
df.shape
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 200 entries, 0 to 199
Data columns (total 7 columns):
 #   Column            Non-Null Count  Dtype 
---  ------            --------------  ----- 
 0   appointment_id    200 non-null    object
 1   patient_id        200 non-null    object
 2   doctor_id         200 non-null    object
 3   appointment_date  200 non-null    object
 4   appointment_time  200 non-null    object
 5   reason_for_visit  200 non-null    object
 6   status            200 non-null    object
dtypes: object(7)
memory usage: 11.1+ KB


In [26]:
df["appointment_id"].is_unique


True

In [27]:
df["status"].value_counts()


status
No-show      52
Scheduled    51
Cancelled    51
Completed    46
Name: count, dtype: int64

In [28]:
df["appointment_date"] = pd.to_datetime(df["appointment_date"])


In [30]:
df.dtypes

appointment_id              object
patient_id                  object
doctor_id                   object
appointment_date    datetime64[ns]
appointment_time            object
reason_for_visit            object
status                      object
dtype: object

In [31]:
total_appointments = len(df)
status_counts = df["status"].value_counts()
status_percentage = (status_counts / total_appointments * 100).round(2)

kpi_df = pd.DataFrame({
    "status": status_counts.index,
    "count": status_counts.values,
    "percentage": status_percentage.values
})

kpi_df


Unnamed: 0,status,count,percentage
0,No-show,52,26.0
1,Scheduled,51,25.5
2,Cancelled,51,25.5
3,Completed,46,23.0


In [33]:
kpi_df.to_csv(
    "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports/appointment_kpis.csv",
    index=False
)


In [34]:
import os

os.makedirs(
    "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports",
    exist_ok=True
)


In [35]:
import os
import pandas as pd

# ensure folder exists
reports_path = "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports"
os.makedirs(reports_path, exist_ok=True)

# save file again
file_path = reports_path + "/appointment_kpis.csv"
kpi_df.to_csv(file_path, index=False)

print("Saved at:", file_path)
print("File exists?", os.path.exists(file_path))


Saved at: C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports/appointment_kpis.csv
File exists? True


In [36]:
monthly_trend = (
    df
    .groupby(df["appointment_date"].dt.to_period("M"))
    .size()
    .reset_index(name="appointment_count")
)

monthly_trend["appointment_date"] = monthly_trend["appointment_date"].astype(str)
monthly_trend


Unnamed: 0,appointment_date,appointment_count
0,2023-01,20
1,2023-02,14
2,2023-03,19
3,2023-04,25
4,2023-05,19
5,2023-06,18
6,2023-07,16
7,2023-08,15
8,2023-09,11
9,2023-10,14


In [37]:
monthly_trend.to_csv(
    "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports/monthly_appointments.csv",
    index=False
)


In [38]:
import os
os.path.exists(
    "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports/monthly_appointments.csv"
)


True

In [40]:
### Next Kpi
no_show_df = df[df["status"] == "No-show"]
no_show_df.head()


Unnamed: 0,appointment_id,patient_id,doctor_id,appointment_date,appointment_time,reason_for_visit,status
1,A002,P032,D004,2023-06-09,14:30:00,Therapy,No-show
4,A005,P040,D003,2023-07-06,12:45:00,Emergency,No-show
10,A011,P022,D007,2023-11-12,16:00:00,Checkup,No-show
14,A015,P026,D004,2023-01-15,17:15:00,Consultation,No-show
20,A021,P028,D009,2023-04-24,10:00:00,Therapy,No-show


In [41]:
no_show_rate = round(len(no_show_df) / len(df) * 100, 2)
no_show_rate


26.0

In [42]:
no_show_by_reason = (
    no_show_df
    .groupby("reason_for_visit")
    .size()
    .reset_index(name="no_show_count")
    .sort_values(by="no_show_count", ascending=False)
)

no_show_by_reason


Unnamed: 0,reason_for_visit,no_show_count
4,Therapy,15
1,Consultation,11
0,Checkup,10
2,Emergency,10
3,Follow-up,6


In [43]:
no_show_trend = (
    no_show_df
    .groupby(no_show_df["appointment_date"].dt.to_period("M"))
    .size()
    .reset_index(name="no_show_count")
)

no_show_trend["appointment_date"] = no_show_trend["appointment_date"].astype(str)
no_show_trend


Unnamed: 0,appointment_date,no_show_count
0,2023-01,8
1,2023-02,3
2,2023-03,3
3,2023-04,5
4,2023-05,5
5,2023-06,6
6,2023-07,4
7,2023-08,3
8,2023-09,3
9,2023-10,5


In [44]:
no_show_by_reason.to_csv(
    "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports/no_show_by_reason.csv",
    index=False
)

no_show_trend.to_csv(
    "C:/Users/ra481/OneDrive/Desktop/healthcare-analytics/reports/no_show_trend.csv",
    index=False
)
