In [78]:
import numpy as np
import pandas as pd
import matplotlib.pyplot as plt
import pickle
from lazypredict.Supervised import LazyClassifier

In [6]:
df = pd.read_csv('Waiter_reward.csv')
level = pd.read_csv("Level.csv")

In [7]:
waiter_re = pd.concat([df,level], axis=1)

In [8]:
waiter_re.drop(['customer_satisfaction_score'], axis=1, inplace=True)

In [9]:
waiter_re.head()

Unnamed: 0,Waiter_id,Extra_Working_hours,table_served,table_turnover_time,sales_revenue,Level
0,1,1,89,222,4446,Gold
1,2,3,110,278,5098,Platinum
2,3,4,144,278,5550,Platinum
3,4,2,95,479,4579,Silver
4,5,0,79,253,4137,Bronze


In [10]:
def label_encoder(x):
    if x == "Platinum":
        return 0
    elif x == "Gold":
        return 1
    elif x == "Silver":
        return 2
    else:
        return 3

In [11]:
waiter_re['Level'] = waiter_re['Level'].apply(label_encoder)

In [12]:
waiter_re.head()

Unnamed: 0,Waiter_id,Extra_Working_hours,table_served,table_turnover_time,sales_revenue,Level
0,1,1,89,222,4446,1
1,2,3,110,278,5098,0
2,3,4,144,278,5550,0
3,4,2,95,479,4579,2
4,5,0,79,253,4137,3


In [13]:
from sklearn.model_selection import train_test_split

In [14]:
X = waiter_re.drop(['Level', 'Waiter_id'], axis=1).values

In [15]:
y = waiter_re['Level'].values

In [16]:
 X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.30)

# Lazy Classifier 

In [17]:
clf = LazyClassifier(verbose=0,ignore_warnings=True, custom_metric=None)
models,predictions = clf.fit(X_train, X_test, y_train, y_test)

100%|██████████| 29/29 [00:02<00:00, 12.65it/s]


In [18]:
print(models)

                               Accuracy  Balanced Accuracy ROC AUC  F1 Score   
Model                                                                          
LabelSpreading                     0.77               0.76    None      0.77  \
LogisticRegression                 0.76               0.75    None      0.76   
NuSVC                              0.76               0.75    None      0.76   
LabelPropagation                   0.76               0.75    None      0.76   
QuadraticDiscriminantAnalysis      0.75               0.75    None      0.75   
SVC                                0.75               0.74    None      0.75   
NearestCentroid                    0.75               0.74    None      0.75   
CalibratedClassifierCV             0.74               0.74    None      0.74   
KNeighborsClassifier               0.74               0.74    None      0.74   
LinearSVC                          0.74               0.73    None      0.73   
LinearDiscriminantAnalysis         0.74 

In [19]:
from sklearn.linear_model import LogisticRegression

# Logistic Regression

In [20]:
classifier = LogisticRegression()

In [21]:
from sklearn.model_selection import GridSearchCV

In [22]:
parameters = {'penalty':['l1', 'l2', 'easticnet'], 'C':[1, 2, 3, 4, 5, 6, 10, 20, 30, 40, 50],
             'max_iter':[100, 200, 300]}

In [23]:
classifier_regression = GridSearchCV(classifier, param_grid = parameters, scoring='accuracy', cv=10)

In [24]:
classifier_regression.fit(X_train, y_train)

In [25]:
print(classifier_regression.best_params_)

{'C': 40, 'max_iter': 300, 'penalty': 'l2'}


In [26]:
print(classifier_regression.best_score_)

0.6785714285714286


In [27]:
log_predict = classifier_regression.predict(X_test)

In [28]:
from sklearn.metrics import classification_report

In [29]:
print(classification_report(log_predict, y_test))

              precision    recall  f1-score   support

           0       0.85      0.82      0.84        91
           1       0.58      0.66      0.62        73
           2       0.62      0.49      0.55        84
           3       0.62      0.75      0.68        52

    accuracy                           0.68       300
   macro avg       0.67      0.68      0.67       300
weighted avg       0.68      0.68      0.67       300



In [30]:
result = log_predict == y_test
pd.DataFrame(result).value_counts()

True     203
False     97
Name: count, dtype: int64

In [31]:
[3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0]
classifier_regression.predict([[0,71,380,4109], [0,76,212,4065], [1,83,476,4273], [1,84,429,4245],
            [1,89,272,4211], [0,75,291,4170], [2,99,525,4615], [2,94,328,4670],
            [2,99,547,4792], [2,99,466,4768], [3,106,352,5082], [3,115,485,5172],
            [3,117,299,5187], [3,117,448,4910], [4,134,484,5423], [4,130,404,5244]])

array([3, 2, 3, 2, 1, 3, 1, 1, 2, 2, 1, 1, 0, 0, 0, 0], dtype=int64)

# XG Boost

In [32]:
from xgboost import XGBClassifier

In [33]:
xgb = XGBClassifier()

In [34]:
xgb.fit(X_train, y_train)

In [79]:
xgb_predict = xgb.predict(X_test)

In [83]:
pickle.dump(xgb, open('XGB_model.pkl', 'wb'))
# xgb.save_model('XGB_model.h5')

In [36]:
print(classification_report(log_predict, y_test))

              precision    recall  f1-score   support

           0       0.85      0.82      0.84        91
           1       0.58      0.66      0.62        73
           2       0.62      0.49      0.55        84
           3       0.62      0.75      0.68        52

    accuracy                           0.68       300
   macro avg       0.67      0.68      0.67       300
weighted avg       0.68      0.68      0.67       300



In [37]:
from sklearn.model_selection import cross_val_score

In [38]:
xgb_accuracy = cross_val_score(xgb, X_train, y_train, cv=10)

In [39]:
print(f"Accuracy: {xgb_accuracy.mean()*100}")

Accuracy: 72.57142857142857


In [40]:
[3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0]
xgb.predict([[0,71,380,4109], [0,76,212,4065], [1,83,476,4273], [1,84,429,4245],
            [1,89,272,4211], [0,75,291,4170], [2,99,525,4615], [2,94,328,4670],
            [2,99,547,4792], [2,99,466,4768], [3,106,352,5082], [3,115,485,5172],
            [3,117,299,5187], [3,117,448,4910], [4,134,484,5423], [4,130,404,5244]])

array([3, 3, 3, 3, 2, 3, 1, 2, 2, 1, 1, 0, 0, 0, 0, 0], dtype=int64)

In [41]:
waiter_re[waiter_re['Level']==3]

Unnamed: 0,Waiter_id,Extra_Working_hours,table_served,table_turnover_time,sales_revenue,Level
4,5,0,79,253,4137,3
7,8,0,72,441,4159,3
11,12,0,79,460,4100,3
14,15,0,74,352,4187,3
16,17,1,81,386,4205,3
...,...,...,...,...,...,...
975,976,0,72,397,4003,3
976,977,2,95,559,4590,3
983,984,1,82,583,4390,3
987,988,0,75,442,4038,3


# K Nearest Neighbor

In [42]:
from sklearn.neighbors  import KNeighborsClassifier

In [43]:
knn = KNeighborsClassifier( )

In [44]:
knn.fit(X_train, y_train)

In [45]:
knn_predict = knn.predict(X_test)

In [46]:
print(classification_report(log_predict, y_test))

              precision    recall  f1-score   support

           0       0.85      0.82      0.84        91
           1       0.58      0.66      0.62        73
           2       0.62      0.49      0.55        84
           3       0.62      0.75      0.68        52

    accuracy                           0.68       300
   macro avg       0.67      0.68      0.67       300
weighted avg       0.68      0.68      0.67       300



In [47]:
from sklearn.model_selection import cross_val_score

In [48]:
knn_accuracy = cross_val_score(knn, X_train, y_train, cv=10)

In [49]:
print(f"Accuracy: {knn_accuracy.mean()*100}")

Accuracy: 72.71428571428571


In [50]:
[3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0]
knn.predict([[0,71,380,4109], [0,76,212,4065], [1,83,476,4273], [1,84,429,4245],
            [1,89,272,4211], [0,75,291,4170], [2,99,525,4615], [2,94,328,4670],
            [2,99,547,4792], [2,99,466,4768], [3,106,352,5082], [3,115,485,5172],
            [3,117,299,5187], [3,117,448,4910], [4,134,484,5423], [4,130,404,5244]])

array([3, 3, 3, 3, 3, 3, 1, 1, 1, 2, 1, 0, 0, 0, 0, 0], dtype=int64)

# SVM 

In [51]:
from sklearn.preprocessing import StandardScaler

In [52]:
sc = StandardScaler()

In [53]:
X_train = sc.fit_transform(X_train)
X_test = sc.transform(X_test)

In [54]:
from sklearn.svm import SVC

In [55]:
svc = SVC()

In [56]:
svc.fit(X_train, y_train)

In [57]:
svc_predict = svc.predict(X_test)

In [58]:
print(classification_report(svc_predict, y_test))

              precision    recall  f1-score   support

           0       0.94      0.85      0.89        98
           1       0.63      0.75      0.68        69
           2       0.59      0.58      0.59        67
           3       0.81      0.77      0.79        66

    accuracy                           0.75       300
   macro avg       0.74      0.74      0.74       300
weighted avg       0.76      0.75      0.75       300



In [59]:
svc_accuracy = cross_val_score(knn, X_train, y_train, cv=10)

In [60]:
print(f"Accuracy: {svc_accuracy.mean()*100}")

Accuracy: 69.42857142857143


In [61]:
[3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0]
svc.predict([[0,71,380,4109], [0,76,212,4065], [1,83,476,4273], [1,84,429,4245],
            [1,89,272,4211], [0,75,291,4170], [2,99,525,4615], [2,94,328,4670],
            [2,99,547,4792], [2,99,466,4768], [3,106,352,5082], [3,115,485,5172],
            [3,117,299,5187], [3,117,448,4910], [4,134,484,5423], [4,130,404,5244]])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

# Decision Tree 

In [62]:
from sklearn.tree import DecisionTreeClassifier

In [63]:
dtc = DecisionTreeClassifier()

In [64]:
dtc.fit(X_train, y_train)

In [65]:
dtc_predict = dtc.predict(X_test)

In [66]:
print(classification_report(dtc_predict, y_test))

              precision    recall  f1-score   support

           0       0.85      0.82      0.83        92
           1       0.63      0.68      0.65        77
           2       0.64      0.57      0.60        74
           3       0.71      0.79      0.75        57

    accuracy                           0.71       300
   macro avg       0.71      0.71      0.71       300
weighted avg       0.71      0.71      0.71       300



In [67]:
dtc_accuracy = cross_val_score(dtc, X_train, y_train, cv=10)

In [68]:
print(f"Accuracy: {dtc_accuracy.mean()*100}")

Accuracy: 66.71428571428571


In [69]:
[3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0]
dtc.predict([[0,71,380,4109], [0,76,212,4065], [1,83,476,4273], [1,84,429,4245],
            [1,89,272,4211], [0,75,291,4170], [2,99,525,4615], [2,94,328,4670],
            [2,99,547,4792], [2,99,466,4768], [3,106,352,5082], [3,115,485,5172],
            [3,117,299,5187], [3,117,448,4910], [4,134,484,5423], [4,130,404,5244]])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

# Random Forest

In [70]:
from sklearn.ensemble import RandomForestClassifier 

In [71]:
rfc = RandomForestClassifier() 

In [72]:
rfc.fit(X_train, y_train)

In [73]:
rfc_predict = rfc.predict(X_test)

In [74]:
print(classification_report(rfc_predict, y_test))

              precision    recall  f1-score   support

           0       0.94      0.87      0.91        95
           1       0.63      0.75      0.68        69
           2       0.59      0.50      0.54        78
           3       0.68      0.74      0.71        58

    accuracy                           0.72       300
   macro avg       0.71      0.72      0.71       300
weighted avg       0.73      0.72      0.72       300



In [75]:
rfc_accuracy = cross_val_score(rfc, X_train, y_train, cv=10)

In [76]:
print(f"Accuracy: {rfc_accuracy.mean()*100}")

Accuracy: 72.14285714285714


In [77]:
[3,3,3,3,2,2,2,2,1,1,1,1,0,0,0,0]
dtc.predict([[0,71,380,4109], [0,76,212,4065], [1,83,476,4273], [1,84,429,4245],
            [1,89,272,4211], [0,75,291,4170], [2,99,525,4615], [2,94,328,4670],
            [2,99,547,4792], [2,99,466,4768], [3,106,352,5082], [3,115,485,5172],
            [3,117,299,5187], [3,117,448,4910], [4,134,484,5423], [4,130,404,5244]])

array([0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0], dtype=int64)

In [74]:
data = {0:"Paltinum", 1:"Gold", 2:'Silver', 3:"Bronze"}

In [76]:
data[0]

'Paltinum'