In [1]:
import numpy as np
import pandas as pd

data = pd.read_csv(r'../../data/processed/processed-data.csv')
data.head()

Unnamed: 0,Gender,Age,Neighbourhood,Scholarship,Hipertension,Diabetes,Alcoholism,Handcap,SMS_received,WaitingTime,AppointmentDayOfWeek,WaitGroup,ChronicGroup,No-show
0,0,62,39,0,1,0,0,1,0,-1,4,5,2,0
1,1,56,39,0,0,0,0,1,0,-1,4,5,1,0
2,0,62,45,0,0,0,0,1,0,-1,4,5,1,0
3,0,8,54,0,0,0,0,1,0,-1,4,5,1,0
4,0,56,39,0,1,1,0,1,0,-1,4,5,0,0


#### Fetch X and Y's for our training from dataset.

In [2]:
x = data.drop('No-show', axis=1)
y = data['No-show']

- Split out data for training and testing.

In [3]:
from sklearn.model_selection import train_test_split

xtrain, xtest, ytrain, ytest = train_test_split(x, y, test_size=0.2, random_state=42)

In [4]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier()
model.fit(xtrain, ytrain)

In [5]:
ypred = model.predict(xtest)

In [6]:
from sklearn.metrics import accuracy_score, classification_report

score = accuracy_score(ytest, ypred)

print("Test Accuracy of the Best Model:", score)
print("\nClassification Report of the Best Model:")
print(classification_report(ytest, ypred))

Test Accuracy of the Best Model: 0.7116234580117422

Classification Report of the Best Model:
              precision    recall  f1-score   support

           0       0.75      0.72      0.74     16900
           1       0.67      0.69      0.68     13418

    accuracy                           0.71     30318
   macro avg       0.71      0.71      0.71     30318
weighted avg       0.71      0.71      0.71     30318



## Let's scale our data for best accuracy.

In [7]:
from sklearn.preprocessing import StandardScaler

scaler = StandardScaler()

scaler.fit(x)
xscaled = scaler.fit_transform(x)

In [8]:
xtrain, xtest, ytrain, ytest = train_test_split(xscaled, y, test_size=0.2, random_state=42)

In [9]:
model = GradientBoostingClassifier()
model.fit(xtrain, ytrain)

In [10]:
ypred = model.predict(xtest)

In [11]:
from sklearn.metrics import accuracy_score, classification_report

score = accuracy_score(ytest, ypred)

print("Test Accuracy of the Best Model:", score)
print("\nClassification Report of the Best Model:")
print(classification_report(ytest, ypred))

Test Accuracy of the Best Model: 0.7116234580117422

Classification Report of the Best Model:
              precision    recall  f1-score   support

           0       0.75      0.72      0.74     16900
           1       0.67      0.69      0.68     13418

    accuracy                           0.71     30318
   macro avg       0.71      0.71      0.71     30318
weighted avg       0.71      0.71      0.71     30318



## Hyperparameter tuning.

In [14]:
from sklearn.ensemble import GradientBoostingClassifier

model = GradientBoostingClassifier(
    n_estimators=100,
    random_state=42, 
    verbose=1,
    max_depth=None,
)

model.fit(xtrain, ytrain)

      Iter       Train Loss   Remaining Time 
         1           1.1910           13.88m
         2           1.0455           14.35m
         3           0.9246           14.04m
         4           0.8227           13.77m
         5           0.7358           13.62m
         6           0.6610           12.86m
         7           0.5962           12.29m
         8           0.5398           11.81m
         9           0.4903           11.50m
        10           0.4469           11.20m
        20           0.2080            9.35m
        30           0.1284            7.93m
        40           0.1001            6.67m
        50           0.0899            5.50m
        60           0.0861            4.38m
        70           0.0847            3.28m
        80           0.0842            2.23m
        90           0.0840            1.12m
       100           0.0839            0.00s


In [15]:
ypred = model.predict(xtest)

In [16]:
from sklearn.metrics import accuracy_score, classification_report

score = accuracy_score(ytest, ypred)

print("Test Accuracy of the Best Model:", score)
print("\nClassification Report of the Best Model:")
print(classification_report(ytest, ypred))

Test Accuracy of the Best Model: 0.7607032126129692

Classification Report of the Best Model:
              precision    recall  f1-score   support

           0       0.80      0.77      0.78     16900
           1       0.72      0.75      0.74     13418

    accuracy                           0.76     30318
   macro avg       0.76      0.76      0.76     30318
weighted avg       0.76      0.76      0.76     30318

