In [7]:
import pandas as pd
from pathlib import Path

In [11]:

df = pd.read_csv("Medical Appointment No Shows.csv")

# Rename COLUMNS


In [12]:
df.columns = [c.strip().lower().replace('-', '_').replace(' ', '_') for c in df.columns]


# 2. IDs to int


In [13]:
df['patient_id'] = df['patientid'].apply(lambda x: int(float(x)) if pd.notna(x) else pd.NA).astype('Int64')
df['appointment_id'] = df['appointmentid'].apply(lambda x: int(float(x)) if pd.notna(x) else pd.NA).astype('Int64')
df.drop(['patientid','appointmentid'], axis=1, inplace=True)


# 3. Datetimes


In [14]:
df['scheduled_day'] = pd.to_datetime(df['scheduledday'], errors='coerce')
df['appointment_day'] = pd.to_datetime(df['appointmentday'], errors='coerce')
df['scheduled_date'] = df['scheduled_day'].dt.date
df['appointment_date'] = df['appointment_day'].dt.date

# 4. no_show binary

In [None]:

df['no_show'] = df['no_show'].astype(str).str.strip().str.capitalize().map({'Yes':1,'No':0})



# 5. Gender cleanup


In [15]:

df['gender'] = df['gender'].astype(str).str.strip().str.capitalize().replace({'F':'Female','M':'Male'}).astype('category')


# 6. Neighbourhood cleanup


In [16]:

df['neighbourhood'] = df['neighbourhood'].astype(str).str.strip().str.title()

# 7. Age cleaning


In [None]:

df.loc[(df['age'] < 0) | (df['age'] > 115), 'age'] = pd.NA
median_age = int(df['age'].median())
df['age'] = df['age'].fillna(median_age).astype(int)

# 8. Binary flags


In [17]:

for c in ['scholarship','hipertension','diabetes','alcoholism','handcap','sms_received']:
    if c in df.columns:
        df[c] = pd.to_numeric(df[c], errors='coerce').fillna(0).astype(int)
# convert handcap to 0/1
df['handicap'] = (df.pop('handcap') > 0).astype(int)

# 9. wait_days


In [None]:

df['wait_days'] = (pd.to_datetime(df['appointment_date']) - pd.to_datetime(df['scheduled_date'])).dt.days
df.loc[df['wait_days'] < 0, 'wait_days'] = 0


# 10. Drop duplicates



In [18]:
df = df.drop_duplicates(subset=['appointment_id']).drop_duplicates()

# 11. Export



In [19]:
df.to_csv("medical_appointments_cleaned.csv", index=False)