In [3]:
import pandas as pd
import numpy as np
from datetime import timedelta, datetime
import random

In [5]:
def random_date(start, end):
    return start + timedelta(days=random.randint(0, (end - start).days))


In [11]:
start_date = datetime(2023, 1, 1)
end_date = datetime(2023, 3, 1)


In [13]:
rows = 35
data = {
    'PatientID': [i for i in range(1, rows + 1)],
    'Age': np.random.randint(5, 80, size=rows),
    'ScheduledDay': [random_date(start_date, end_date).strftime('%Y-%m-%d') for _ in range(rows)],
    'AppointmentDay': [random_date(start_date + timedelta(days=2), end_date + timedelta(days=10)).strftime('%Y-%m-%d') for _ in range(rows)],
    'SMS_received': np.random.randint(0, 2, size=rows),
    'No-show': np.random.choice(['Yes', 'No'], size=rows, p=[0.3, 0.7])  # 30% no-show
}

df = pd.DataFrame(data)
df.to_csv('healthcare_appointments_35rows.csv', index=False)


In [15]:
df = pd.read_csv('healthcare_appointments_35rows.csv')

# Convert to datetime
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])

# Add new features
df['WaitingDays'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days
df['DayOfWeek'] = df['AppointmentDay'].dt.day_name()
df['No-show'] = df['No-show'].map({'Yes': 1, 'No': 0})  # Binary

df.head()

Unnamed: 0,PatientID,Age,ScheduledDay,AppointmentDay,SMS_received,No-show,WaitingDays,DayOfWeek
0,1,24,2023-02-12,2023-01-19,0,0,-24,Thursday
1,2,17,2023-01-02,2023-01-22,0,0,20,Sunday
2,3,48,2023-01-18,2023-03-09,0,1,50,Thursday
3,4,6,2023-01-14,2023-02-09,1,1,26,Thursday
4,5,57,2023-01-07,2023-02-06,0,0,30,Monday


In [17]:
from sklearn.model_selection import train_test_split
from sklearn.tree import DecisionTreeClassifier
from sklearn.metrics import accuracy_score

X = df[['Age', 'SMS_received', 'WaitingDays']]
y = df['No-show']

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=1)

model = DecisionTreeClassifier()
model.fit(X_train, y_train)
y_pred = model.predict(X_test)

print("Accuracy:", accuracy_score(y_test, y_pred))

# Add predictions
df['Predicted_No_Show'] = model.predict(X)


Accuracy: 0.8571428571428571


In [19]:
df.to_csv('final_healthcare_data.csv', index=False)