In [73]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn import preprocessing,svm

import mglearn
import warnings
warnings.filterwarnings('ignore', '.*Axes.*compatible.*tight_layout.*')

df = pd.read_csv('/home/kawaleenm/Documents/Medical Brazil/Med Appointments.csv',low_memory = False)

In [74]:
df['Gender'] = df.Gender.replace(['M', 'F'], [1, 0])
df['No-show'] = df['No-show'].replace(['Yes', 'No'], [1, 0])

In [76]:
df['AppointmentDay'] = pd.to_datetime(df.AppointmentDay)
df['ScheduledDay'] = pd.to_datetime(df.ScheduledDay)
df['Days_Before_Appointment'] = df.AppointmentDay.dt.date - df.ScheduledDay.dt.date 
df['Days_Before_Appointment'] = abs(df['Days_Before_Appointment'])
df['Days_Before_Appointment'] = df.Days_Before_Appointment.astype('str')

In [79]:
import re

def day_str(x):
    m = re.sub('\s[a-zA-Z0-9:-]*','',x)
    return m

df.Days_Before_Appointment = df.Days_Before_Appointment.apply(day_str)
df['Days_Before_Appointment'] = df.Days_Before_Appointment.astype('float')

In [82]:
df.drop(['PatientId','AppointmentID','ScheduledDay','AppointmentDay'], axis=1, inplace=True)

In [83]:
neighbourhood_names = list(df.Neighbourhood.unique())
neighbourhood_names.sort()

In [86]:
_list = []
for x in range(81):
    _list.append(x)

neighbourhood_dict = {k:v for k,v in zip(neighbourhood_names,_list) }

def substitute(x):
    for k,v in neighbourhood_dict.items():
        if x == k:
            return v
df['Neighbourhood'] = df.Neighbourhood.apply(substitute) 

In [91]:
X = np.array(df.drop(['No-show'],axis=1))
y = np.array(df['No-show'])

X_train, X_test, y_train, y_test = train_test_split(X, y, random_state=0)
print('Number of Training sets: {}'.format(X_train.shape))
print('Number of Testing sets: {}'.format(X_test.shape))

Number of Training sets: (82895, 10)
Number of Testing sets: (27632, 10)


In [104]:
# Predicting No-Show is True with K - Neighbors Classifier

from sklearn.neighbors import KNeighborsClassifier

knn = KNeighborsClassifier(n_neighbors=1)
knn.fit(X_train, y_train)


print("Accuracy of Prediction: {:.2f}".format(knn.score(X_test, y_test)))

Accuracy of Prediction: 0.74


In [96]:
from sklearn.tree import DecisionTreeClassifier

tree = DecisionTreeClassifier(random_state=0)
tree.fit(X_train, y_train)
#print("Accuracy on training set: {:.3f}".format(tree.score(X_train, y_train)))
print("Accuracy on test set: {:.3f}".format(tree.score(X_test, y_test)))

Accuracy on training set: 0.966
Accuracy on test set: 0.739


In [119]:
from sklearn.linear_model import LogisticRegression
logreg = LogisticRegression().fit(X, y)

print("Accuracy on Test set: {:.3f}".format(logreg.score(X_test, y_test)))

Accuracy on Test set: 0.798


In [118]:
print('Predict whether patient will Turn Up')
age = int(input('Enter age of patient:'))
gender = int(input('Enter Gender (_0 for Female_) (_1 for Male_) : '))
neighbourhood = int(input('Enter Neighbourhood Pincode (_between 0-80 _): '))
Scholarship  = int(input('If Scholorship (_0 for No_) (_1 for Yes_) : '))
Hipertension = int(input('If HyperTension (_0 for No_) (_1 for Yes_) : '))
Diabetes = int(input('If Diabetes (_0 for No_) (_1 for Yes_) : '))
Alcoholism = int(input('If Alcoholism (_0 for No_) (_1 for Yes_) : '))
Handcap = int(input('If Handcap (_0 for No_) (_1 for Yes_) : '))
SMS_received = int(input('If SMS_received (_0 for No_) (_1 for Yes_) : '))
Days_Before_Appointment = float(input('How many Days from now appoint should be scheduled: '))

patient_list = []
patient_list.append(age)
patient_list.append(gender)
patient_list.append(neighbourhood)
patient_list.append(Scholarship)
patient_list.append(Hipertension)
patient_list.append(Diabetes)
patient_list.append(Alcoholism)
patient_list.append(Handcap)
patient_list.append(SMS_received)
patient_list.append(Days_Before_Appointment)

patient_test = np.array([patient_list])

y_knn = knn.predict(patient_test)
y_tree = tree.predict(patient_test)
y_logreg = logreg.predict(patient_test)

if y_knn[0] == 0:
    verdict_k = 'Patient will Show Up'
else:
    verdict_k = 'Patient will not Show Up'

if y_tree[0] == 0:
    verdict_t = 'Patient will Show Up'
else:
    verdict_t = 'Patient will not Show Up'
    
if y_logreg[0] == 0:
    verdict_l = 'Patient will Show Up'
else:
    verdict_l = 'Patient will not Show Up'    

print("Prediction by KNN: {}\n".format(verdict_k))
print("Prediction by Decesion Tree: {}\n".format(verdict_t))
print("Prediction by LogReg: {}\n".format(verdict_l))

Predict whether patient will Turn Up
Enter age of patient:21
Enter Gender__0 for Female__1 for Male__ : 0
Enter Neighbourhood Pincode __between 0-80 __: 13
If Scholorship __0 for No__1 for Yes__ : 1
If HyperTension __0 for No__1 for Yes__ : 0
If Diabetes __0 for No__1 for Yes__ : 1
If Alcoholism __0 for No__1 for Yes__ : 0
If Handcap __0 for No__1 for Yes__ : 1
If SMS_received __0 for No__1 for Yes__ : 0
How many Days from now appoint should be scheduled: 4
Prediction by KNN:
 Patient will Show Up
Prediction by Decesion Tree:
 Patient will Show Up
Prediction by LogReg:
 Patient will Show Up
