In [1]:
import pandas as pd

# Load
df_med = pd.read_csv("../Datasets/KaggleV2-May-2016.csv")

print("Before transformation:\n", df_med.dtypes)

Before transformation:
 PatientId         float64
AppointmentID       int64
Gender             object
ScheduledDay       object
AppointmentDay     object
Age                 int64
Neighbourhood      object
Scholarship         int64
Hipertension        int64
Diabetes            int64
Alcoholism          int64
Handcap             int64
SMS_received        int64
No-show            object
dtype: object


In [2]:
# -------------------
# Datetime Conversion
# -------------------
df_med["ScheduledDay"] = pd.to_datetime(df_med["ScheduledDay"])
df_med["AppointmentDay"] = pd.to_datetime(df_med["AppointmentDay"])

In [3]:
# -------------------
# Categorical Handling
# -------------------

# Gender: map to numeric (F=0, M=1)
df_med["Gender"] = df_med["Gender"].map({"F": 0, "M": 1})

# No-show: map to numeric (No=0, Yes=1)
df_med["No-show"] = df_med["No-show"].map({"No": 0, "Yes": 1})

# Leave other binary columns as numeric per Option 2

# Neighbourhood: convert to category dtype for memory & modeling
df_med["Neighbourhood"] = df_med["Neighbourhood"].astype("category")

print("\nAfter transformation:\n", df_med.dtypes)


After transformation:
 PatientId                     float64
AppointmentID                   int64
Gender                          int64
ScheduledDay      datetime64[ns, UTC]
AppointmentDay    datetime64[ns, UTC]
Age                             int64
Neighbourhood                category
Scholarship                     int64
Hipertension                    int64
Diabetes                        int64
Alcoholism                      int64
Handcap                         int64
SMS_received                    int64
No-show                         int64
dtype: object


In [4]:
# -------------------
# Validation
# -------------------
print("\nMissing values after transformation:\n", df_med.isnull().sum())


Missing values after transformation:
 PatientId         0
AppointmentID     0
Gender            0
ScheduledDay      0
AppointmentDay    0
Age               0
Neighbourhood     0
Scholarship       0
Hipertension      0
Diabetes          0
Alcoholism        0
Handcap           0
SMS_received      0
No-show           0
dtype: int64


In [5]:
df_med.to_csv("KaggleV2-May-2016-Cleaned.csv", index=False)