In [56]:
import os
import kagglehub
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.neighbors import KNeighborsClassifier, KNeighborsRegressor

# Setup
curr_dir = os.getcwd()
data_path = os.path.join(curr_dir, "data")
os.makedirs(data_path, exist_ok=True)

# Download dataset
dataset_path = kagglehub.dataset_download("joniarroba/noshowappointments")
csv_file = os.path.join(dataset_path, "KaggleV2-May-2016.csv")

# Load data
df = pd.read_csv(csv_file)

# Convert categorical columns
df["Gender"] = df["Gender"].map({"F": 0, "M": 1})
df["No-show"] = df["No-show"].map({"Yes": 1, "No": 0})

# Convert dates and add derived feature
df['ScheduledDay'] = pd.to_datetime(df['ScheduledDay'])
df['AppointmentDay'] = pd.to_datetime(df['AppointmentDay'])
df['DaysUntilAppointment'] = (df['AppointmentDay'] - df['ScheduledDay']).dt.days

# Select features
features = [
    'Gender',
    'Age',
    'Scholarship',
    'Hipertension',
    'Diabetes',
    'Alcoholism',
    'Handcap',
    'SMS_received',
    'DaysUntilAppointment'
]

# Split data
X_train, X_test, y_train, y_test = train_test_split(
    df[features], df["No-show"], test_size=0.2, random_state=42, stratify=df["No-show"]
)

# Train models
knn_classifier = KNeighborsClassifier(n_neighbors=10).fit(X_train, y_train)
knn_regressor = KNeighborsRegressor(n_neighbors=2).fit(X_train, y_train)

# Evaluate models
print("Classifier training accuracy:", knn_classifier.score(X_train, y_train))
print("Classifier test accuracy:", knn_classifier.score(X_test, y_test))

print("Regressor training R² score:", knn_regressor.score(X_train, y_train))
print("Regressor test R² score:", knn_regressor.score(X_test, y_test))

# Make predictions (fixed)
y_classifier_pred = knn_classifier.predict(X_test)
y_regressor_pred = knn_regressor.predict(X_test)

print("Predictions (Classifier):", y_classifier_pred[:10])
print("Predictions (Regressor):", y_regressor_pred[:10])


Classifier training accuracy: 0.8094457199081666
Classifier test accuracy: 0.789559395639193
Regressor training R² score: 0.16507655767769613
Regressor test R² score: -0.3052422249933868
Predictions (Classifier): [0 1 0 0 0 0 0 0 0 0]
Predictions (Regressor): [0.  1.  0.5 0.5 0.  0.  0.  0.  0.  0.5]
