# Setup

In [None]:
!pip install imbalanced-learn
!pip install xgboost

In [309]:
# from IPython.display import HTML

# HTML("<style>.container { width:80% !important; }</style>")

In [1]:
import os
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import seaborn as sns

from sklearn.impute import SimpleImputer
from sklearn.preprocessing import OrdinalEncoder
from imblearn.over_sampling import SMOTE

In [41]:
# replace the dir if needed

ROOT_DIR = "/home/hui/Projects/g5/datasets"  # Henry's dir
# ROOT_DIR = "/content/drive/MyDrive/Project - G5/Dataset/"    #Lejia's dir
# ROOT_DIR = "/content/drive/MyDrive/Project - G5/Dataset/" #Nan's dir
# ROOT_DIR = "/content/drive/MyDrive/Project - G5/Dataset"


# path for image augmentation
RESIZE_DIR = ROOT_DIR + "/augmentation/resize"


# path for the different handcraft features
RAW_PIXEL_DIR = ROOT_DIR + "/handcraft_features/raw_pixel"
SIFT_DIR = ROOT_DIR + "/handcraft_features/sift"
HOG_DIR = ROOT_DIR + "/handcraft_features/hog"
LBP_DIR = ROOT_DIR + "/handcraft_features/lbp"


# menu datasets
MENU_FOOD = ROOT_DIR + "/MeetFresh_menu_food_2.csv"
MENU_DRINK = ROOT_DIR + "/MeetFresh_menu_drink_2.csv"

In [159]:
df_menu_food = pd.read_csv(MENU_FOOD, skiprows=1)
df_menu_drink = pd.read_csv(MENU_DRINK, skiprows=1)

In [160]:
# df_menu_food.sample(10)

In [161]:
import plotly.express as px

fig = px.histogram(df_menu_drink, x="Menu Category",
                   marginal="box",  # or violin, rug
                   hover_data=df_menu_drink.columns,
                   text_auto=True)
fig.show()

In [162]:
df_menu_drink["Menu Category"].unique()

array(['Teas', 'Herbal Teas', 'Milk Teas', 'Fresh Milk', 'Almond Drink',
       'Winter Melon Teas', 'Slush', 'Fluffy', 'Fruit Series'],
      dtype=object)

# Preprocessing

## Food Menu

In [163]:
df_menu_food.drop(['Item ID', 'Item Name (CHN)', 'Item Name (ENG)'], axis=1, inplace=True)

df_menu_food.iloc[:, 6:].fillna(0, inplace=True)

df_menu_food.iloc[:, 6:].replace([pd.NA, np.NaN, np.nan], 0, inplace=True)

In [164]:
# df_menu_food.loc[:, "Size"].fillna(df_menu_food["Size"].mode(), inplace=True)
# df_menu_food.loc[:, "Cold"].fillna(df_menu_food["Cold"].mode(), inplace=True)
# df_menu_food.loc[:, "Hot"].fillna(df_menu_food["Hot"].mode(), inplace=True)

# df_menu_food.loc[:, "Size"].replace([pd.NA, np.NaN, np.nan, ""], df_menu_food["Size"].mode(), inplace=True)
# df_menu_food.loc[:, "Cold"].replace([pd.NA, np.NaN, np.nan, ""], df_menu_food["Cold"].mode(), inplace=True)
# df_menu_food.loc[:, "Hot"].replace([pd.NA, np.NaN, np.nan, ""], df_menu_food["Hot"].mode(), inplace=True)

si = SimpleImputer(missing_values=np.nan, strategy='most_frequent')
df_menu_food.loc[:, "Size"] = si.fit_transform(np.array(df_menu_food.loc[:, "Size"]).reshape(-1, 1))
df_menu_food.loc[:, "Cold"] = si.fit_transform(np.array(df_menu_food.loc[:, "Cold"]).reshape(-1, 1))
df_menu_food.loc[:, "Hot"] = si.fit_transform(np.array(df_menu_food.loc[:, "Hot"]).reshape(-1, 1))

In [165]:
df_menu_drink["Menu Category"].value_counts()

Milk Teas            10
Fluffy                7
Fruit Series          7
Winter Melon Teas     6
Teas                  4
Herbal Teas           4
Fresh Milk            4
Slush                 4
Almond Drink          1
Name: Menu Category, dtype: int64

In [166]:
labels_food = {'Signature Series': 0, 'Create Your Own': 1, 'Shaved Ice': 2,
               'Tofu Pudding': 3, 'Egg Waffle': 4, 'Small Bites': 5}

labels_drink = {'Milk Teas': 0, 'Fruit Series': 1, 'Winter Melon Teas': 2, 'Teas': 3, 'Herbal Teas': 4,
                'Fresh Milk': 5, 'Slush': 6, 'Almond Drink': 7, 'Fluffy': 8}

df_menu_food["Menu Category"].replace(labels_food, inplace=True)
df_menu_drink["Menu Category"].replace(labels_drink, inplace=True)

In [167]:
enc_oe = OrdinalEncoder()

df_menu_food["Size"] = enc_oe.fit_transform(np.array(df_menu_food["Size"]).reshape(-1, 1))
# df_menu_food["Menu Category"] = enc_oe.fit_transform(np.array(df_menu_food["Menu Category"]).reshape(-1, 1))

In [168]:
# now we need to reindex the col
temp_cols = df_menu_food.columns.tolist()
new_cols = temp_cols[1:] + temp_cols[0:1]
df_menu_food = df_menu_food[new_cols]

# convert the float64 to int32
df_menu_food["Hot"] = df_menu_food["Hot"].astype(np.int32)
df_menu_food["Cold"] = df_menu_food["Cold"].astype(np.int32)
df_menu_food["Size"] = df_menu_food["Size"].astype(np.int32)
df_menu_food["Menu Category"].astype(np.int32)

0     0
1     0
2     0
3     0
4     0
5     0
6     1
7     1
8     1
9     1
10    1
11    2
12    2
13    2
14    2
15    2
16    2
17    2
18    2
19    2
20    2
21    2
22    2
23    2
24    2
25    2
26    2
27    3
28    3
29    3
30    3
31    3
32    3
33    3
34    3
35    3
36    3
37    3
38    3
39    3
40    3
41    3
42    3
43    3
44    3
45    4
46    4
47    4
48    4
49    4
50    4
51    4
52    5
53    5
54    5
55    5
56    5
57    5
58    5
59    5
Name: Menu Category, dtype: int32

In [169]:
df_menu_food.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 60 entries, 0 to 59
Data columns (total 43 columns):
 #   Column                  Non-Null Count  Dtype
---  ------                  --------------  -----
 0   Hot                     60 non-null     int32
 1   Cold                    60 non-null     int32
 2   Size                    60 non-null     int32
 3   Kcal                    60 non-null     int64
 4   Egg Allergen            60 non-null     int64
 5   Peanut Allergen         60 non-null     int64
 6   Top Seller              60 non-null     int64
 7   Seasonal                60 non-null     int64
 8   Taro Balls              60 non-null     int64
 9   Taro Paste              60 non-null     int64
 10  Potaro Balls            60 non-null     int64
 11  Boba                    60 non-null     int64
 12  Shaved Ice              60 non-null     int64
 13  Grass Jelly             60 non-null     int64
 14  Grass Jelly Shaved Ice  60 non-null     int64
 15  Ice Cream               6

In [170]:
df_menu_food.sample(10)

Unnamed: 0,Hot,Cold,Size,Kcal,Egg Allergen,Peanut Allergen,Top Seller,Seasonal,Taro Balls,Taro Paste,...,Mango,Egg Waffle,Matcha Egg Waffle,Chocolate Egg Waffle,Chocolate Chips,Matcha Red Bean,Mixed Nuts,Chocolate Wafer Rolls,Chocolate Syrup,Menu Category
24,0,1,1,1850,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2
59,0,1,0,300,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
8,1,0,0,310,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,1
36,1,0,0,441,0,1,0,1,0,0,...,0,0,0,0,0,0,0,0,0,3
48,0,1,0,585,0,0,0,0,0,0,...,0,0,1,0,0,0,0,0,0,4
55,0,1,0,266,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
21,0,1,0,1267,0,0,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2
56,0,1,0,663,0,1,0,0,0,0,...,0,0,0,0,0,0,0,0,0,5
4,1,0,0,944,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
22,0,1,1,653,0,1,1,0,0,0,...,0,0,0,0,0,0,0,0,0,2


## Drink Menu

In [171]:
df_menu_drink.drop(['Item ID', 'Item Name (CHN)', 'Item Name (ENG)'], axis=1, inplace=True)

# df_menu_drink.sample(10)

In [172]:
# df_menu_drink["Menu Category"] = enc_oe.fit_transform(np.array(df_menu_drink["Menu Category"]).reshape(-1, 1))

In [173]:
# now we need to reindex the col
temp_cols = df_menu_drink.columns.tolist()
new_cols = temp_cols[1:] + temp_cols[0:1]
df_menu_drink = df_menu_drink[new_cols]

In [174]:
df_menu_drink.sample(10)

Unnamed: 0,Hot,Cold,Kcal,Full Sugar,Half Sugar,No Sugar,Fixed Sugar,Black Tea,Green Tea,Oolong Tea,...,Strawberry,Fluffly Creamer,Lemon,Passion Fuit,Orange,Red Bean Soup,Hot Grass Jelly,Top Seller,Peanut,Menu Category
2,0,1,446,1,1,1,0,0,0,1,...,0,0,0,0,0,0,0,0,0,3
21,0,1,552,1,1,1,0,1,1,1,...,0,0,0,0,0,0,0,0,0,5
24,0,1,188,1,1,0,0,0,0,0,...,0,0,1,0,0,0,0,1,0,2
27,0,1,480,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,2
28,0,1,344,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,1,0,2
43,0,1,284,0,0,0,1,0,1,0,...,0,0,0,1,0,0,0,1,0,1
37,0,1,315,1,1,0,0,0,0,0,...,0,1,0,0,0,0,0,0,0,8
17,0,1,590,1,1,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
45,0,1,219,0,0,0,1,0,1,0,...,0,1,0,0,1,0,0,0,0,1
32,0,1,290,0,0,0,1,0,0,0,...,0,1,0,0,0,0,0,1,0,6


In [175]:
df_menu_drink.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 47 entries, 0 to 46
Data columns (total 36 columns):
 #   Column            Non-Null Count  Dtype
---  ------            --------------  -----
 0   Hot               47 non-null     int64
 1   Cold              47 non-null     int64
 2   Kcal              47 non-null     int64
 3   Full Sugar        47 non-null     int64
 4   Half Sugar        47 non-null     int64
 5   No Sugar          47 non-null     int64
 6   Fixed Sugar       47 non-null     int64
 7   Black Tea         47 non-null     int64
 8   Green Tea         47 non-null     int64
 9   Oolong Tea        47 non-null     int64
 10  Jin Xuan Oolong   47 non-null     int64
 11  Herbal Tea        47 non-null     int64
 12  Winter Melon Tea  47 non-null     int64
 13  Almond Drink      47 non-null     int64
 14  Fresh Milk        47 non-null     int64
 15  Milk              47 non-null     int64
 16  Boba              47 non-null     int64
 17  Taro Paste        47 non-null     int

# Model

In [176]:
from xgboost import XGBClassifier
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score

In [179]:
def model_xgb1():
    model = XGBClassifier(
        learning_rate=0.1,
        metric='multiclass',
        eval_metric='mlogloss'
    )

    return model


def model_xgb2():
    model = XGBClassifier(
        learning_rate=0.1,
        max_depth=5,
        n_estimators=100,
        gamma=0,
        metric='multiclass',
        eval_metric='mlogloss'
    )

    return model

## Food Menu

### Split data

In [177]:
# split data into train and test sets

seed = 7
test_size = 0.3

X_train1, X_test1, y_train1, y_test1 = train_test_split(
    df_menu_food.iloc[:, :-1],
    df_menu_food.iloc[:, -1],
    test_size=test_size,
    random_state=seed)

In [178]:
print("X train: {} and test: {}".format(X_train1.shape, X_test1.shape))
print("Y train: {} and test: {}".format(y_train1.shape, y_test1.shape))

X train: (42, 42) and test: (18, 42)
Y train: (42,) and test: (18,)


### Model 1

In [180]:
model = model_xgb1()
model.fit(X_train1, y_train1)

y_pred1 = model.predict(X_test1)

Parameters: { "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.







pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.



### Analysis the results

In [181]:
predictions1 = [round(value) for value in y_pred1]

# evaluate predictions
accuracy = accuracy_score(y_test1, predictions1)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 72.22%


In [182]:
from sklearn.metrics import classification_report

In [193]:
print(classification_report(y_test1, predictions1, target_names=labels_food))

                  precision    recall  f1-score   support

Signature Series       0.00      0.00      0.00         1
 Create Your Own       1.00      1.00      1.00         1
      Shaved Ice       1.00      1.00      1.00         7
    Tofu Pudding       0.67      0.57      0.62         7
      Egg Waffle       0.00      0.00      0.00         1
     Small Bites       0.25      1.00      0.40         1

        accuracy                           0.72        18
       macro avg       0.49      0.60      0.50        18
    weighted avg       0.72      0.72      0.71        18



In [184]:
from sklearn.metrics import confusion_matrix

In [185]:
conf_matrix = confusion_matrix(y_test1, y_pred1, normalize="true")
conf_matrix = np.around(conf_matrix.astype('float') /
                        conf_matrix.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix, text_auto=True,
                x=list(labels_food), y=list(labels_food))
fig.update_layout(title="Normalized Confusion Matrix for Food Menu on XGBoost")
fig.show()

## Drink Menu

### Split data

In [186]:
# split data into train and test sets

seed = 7
test_size = 0.3

X_train2, X_test2, y_train2, y_test2 = train_test_split(
    df_menu_drink.iloc[:, :-1],
    df_menu_drink.iloc[:, -1],
    test_size=test_size,
    random_state=seed)

In [187]:
print("X train: {} and test: {}".format(X_train2.shape, X_test2.shape))
print("Y train: {} and test: {}".format(y_train2.shape, y_test2.shape))

X train: (32, 35) and test: (15, 35)
Y train: (32,) and test: (15,)


### Model 2

In [188]:
model2 = model_xgb1()
model2.fit(X_train2, y_train2)

y_pred2 = model2.predict(X_test2)

Parameters: { "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




### Analysis the results

In [189]:
predictions2 = [round(value) for value in y_pred2]

# evaluate predictions
accuracy2 = accuracy_score(y_test2, predictions2)
print("Accuracy: %.2f%%" % (accuracy2 * 100.0))

Accuracy: 73.33%


In [190]:
from sklearn.metrics import classification_report
from sklearn.metrics import confusion_matrix

In [194]:
df_menu_drink["Menu Category"].value_counts()

0    10
8     7
1     7
2     6
3     4
4     4
5     4
6     4
7     1
Name: Menu Category, dtype: int64

In [195]:
# labels_drink = {'Milk Teas': 0, 'Fruit Series': 1, 'Winter Melon Teas': 2, 'Teas': 3, 'Herbal Teas': 4,
#                 'Fresh Milk': 5, 'Slush': 6, 'Almond Drink': 7, 'Fluffy': 8}

print(classification_report(y_test2, predictions2))

              precision    recall  f1-score   support

           0       1.00      0.75      0.86         4
           1       0.40      1.00      0.57         2
           2       1.00      1.00      1.00         1
           3       1.00      1.00      1.00         1
           5       0.67      1.00      0.80         2
           6       0.00      0.00      0.00         2
           7       0.00      0.00      0.00         1
           8       1.00      1.00      1.00         2

    accuracy                           0.73        15
   macro avg       0.63      0.72      0.65        15
weighted avg       0.68      0.73      0.68        15




Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.



In [197]:
conf_matrix2 = confusion_matrix(y_test2, y_pred2, normalize="true")
conf_matrix2 = np.around(conf_matrix2.astype('float') /
                        conf_matrix2.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix2, text_auto=True)
fig.update_layout(title="Normalized Confusion Matrix for Food Menu on XGBoost")
fig.show()

# Tune Parameters

In [248]:
from sklearn.model_selection import StratifiedKFold, GridSearchCV, RandomizedSearchCV

para_tunning = {
    'learning_rate': [0.01, 0.05, 0.1],
    'min_child_weight': [1, 5, 10],
    'gamma': [0.5, 1, 1.5, 2, 5],
    'subsample': np.arange(0.6, 0.8, 0.2).tolist(),
    'colsample_bytree': np.arange(0.6, 0.8, 0.2).tolist(),
    'max_depth': np.arange(3, 10, 1).tolist(),
    "n_estimators": np.arange(100, 500, 100).tolist(),
    "objective": ["multi:mlogloss"],
    "aplha": np.arange(0, 8, 2).tolist()
}

# SMOTE

In [207]:
from collections import Counter
from imblearn.over_sampling import RandomOverSampler

## Food Menu

In [205]:
ros1 = RandomOverSampler(random_state=42)

In [206]:
X_res1, y_res1 = ros1.fit_resample(df_menu_food.iloc[:, :-1],
                                   df_menu_food.iloc[:, -1])

In [208]:
print('Resampled dataset shape %s' % Counter(y_res1))

Resampled dataset shape Counter({0: 18, 1: 18, 2: 18, 3: 18, 4: 18, 5: 18})


### Split data

In [209]:
# split data into train and test sets

seed = 7
test_size = 0.3

X_train1a, X_test1a, y_train1a, y_test1a = train_test_split(
    X_res1,
    y_res1,
    test_size=test_size,
    random_state=seed)

In [215]:
print("X train: {} and test: {}".format(X_train1.shape, X_test1.shape))
print("Y train: {} and test: {}".format(y_train1.shape, y_test1.shape))
print()
print("X train: {} and test: {}".format(X_train1a.shape, X_test1a.shape))
print("Y train: {} and test: {}".format(y_train1a.shape, y_test1a.shape))

X train: (42, 42) and test: (18, 42)
Y train: (42,) and test: (18,)

X train: (75, 42) and test: (33, 42)
Y train: (75,) and test: (33,)


### Model 3

#### a

In [216]:
model3 = model_xgb1()
model3.fit(X_train1a, y_train1a)

y_pred1a = model3.predict(X_test1a)




pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.



Parameters: { "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.




In [220]:
predictions1a = [round(value) for value in y_pred1a]

# evaluate predictions
accuracy = accuracy_score(y_test1a, predictions1a)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 75.76%


#### b

In [253]:
skf = StratifiedKFold(n_splits=10, shuffle=True)
model3b = XGBClassifier()

grid = RandomizedSearchCV(model3b,
                    para_tunning,
                    scoring='recall_macro',
                    n_jobs=-1,
                    cv=skf.split(X_train1a, y_train1a),
                    refit=True)

grid.fit(X_train1a, y_train1a)
y_pred1b = grid.predict(X_test1a)

predictions1b = [round(value) for value in y_pred1b]

# evaluate predictions
accuracy = accuracy_score(y_test1a, predictions1b)
print("Accuracy: %.2f%%" % (accuracy * 100.0))
print()
print("The best parameters: ".format(grid.best_params_))


The least populated class in y has only 9 members, which is less than n_splits=10.



Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } 

Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } 

Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } 

Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha", "metric" } 




pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.



Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Accuracy: 72.73%

The best parameters: 


### Analysis the results

In [221]:
print(classification_report(y_test1a, predictions1a, target_names=labels_food))

                  precision    recall  f1-score   support

Signature Series       1.00      1.00      1.00         5
 Create Your Own       0.83      1.00      0.91         5
      Shaved Ice       1.00      0.44      0.62         9
    Tofu Pudding       0.33      0.50      0.40         2
      Egg Waffle       0.56      0.71      0.63         7
     Small Bites       0.83      1.00      0.91         5

        accuracy                           0.76        33
       macro avg       0.76      0.78      0.74        33
    weighted avg       0.81      0.76      0.75        33



In [222]:
conf_matrix = confusion_matrix(y_test1a, y_pred1a, normalize="true")
conf_matrix = np.around(conf_matrix.astype('float') /
                        conf_matrix.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix, text_auto=True,
                x=list(labels_food), y=list(labels_food))
fig.update_layout(
    title="Normalized Confusion Matrix for Food Menu on XGBoost (ROS data)")
fig.show()

## Drink Menu

In [224]:
ros2 = RandomOverSampler(random_state=42)

X_res2, y_res2 = ros2.fit_resample(df_menu_drink.iloc[:, :-1],
                                   df_menu_drink.iloc[:, -1])

### Split data

In [225]:
# split data into train and test sets

seed = 7
test_size = 0.3

X_train2a, X_test2a, y_train2a, y_test2a = train_test_split(
    X_res2,
    y_res2,
    test_size=test_size,
    random_state=seed)

In [226]:
print("X train: {} and test: {}".format(X_train2.shape, X_test2.shape))
print("Y train: {} and test: {}".format(y_train2.shape, y_test2.shape))
print()
print("X train: {} and test: {}".format(X_train2a.shape, X_test2a.shape))
print("Y train: {} and test: {}".format(y_train2a.shape, y_test2a.shape))

X train: (32, 35) and test: (15, 35)
Y train: (32,) and test: (15,)

X train: (63, 35) and test: (27, 35)
Y train: (63,) and test: (27,)


### Model 4

#### a

In [227]:
model4 = model_xgb1()
model4.fit(X_train2a, y_train2a)

y_pred2a = model4.predict(X_test2a)

Parameters: { "metric" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.







pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.



In [228]:
predictions2a = [round(value) for value in y_pred2a]

# evaluate predictions
accuracy = accuracy_score(y_test2a, predictions2a)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Accuracy: 88.89%


#### b

In [258]:
skf = StratifiedKFold(n_splits=5, shuffle=True)
model4b = XGBClassifier()

grid = RandomizedSearchCV(model4b,
                    para_tunning,
                    scoring='recall_macro',
                    n_jobs=-1,
                    cv=skf.split(X_train2a, y_train2a),
                    refit=True)

grid.fit(X_train2a, y_train2a)
y_pred2b = grid.predict(X_test2a)

predictions2b = [round(value) for value in y_pred2b]

# evaluate predictions
accuracy = accuracy_score(y_test2a, predictions2b)
print("Accuracy: %.2f%%" % (accuracy * 100.0))

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa




pandas.Int64Index is deprecated and will be removed from pandas in a future version. Use pandas.Index with the appropriate dtype instead.



Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Accuracy: 92.59%


In [260]:
grid.get_params()["estimator"]

XGBClassifier(base_score=None, booster=None, colsample_bylevel=None,
              colsample_bynode=None, colsample_bytree=None,
              enable_categorical=False, gamma=None, gpu_id=None,
              importance_type=None, interaction_constraints=None,
              learning_rate=None, max_delta_step=None, max_depth=None,
              min_child_weight=None, missing=nan, monotone_constraints=None,
              n_estimators=100, n_jobs=None, num_parallel_tree=None,
              predictor=None, random_state=None, reg_alpha=None,
              reg_lambda=None, scale_pos_weight=None, subsample=None,
              tree_method=None, validate_parameters=None, verbosity=None)

In [261]:
print(classification_report(y_test2a, predictions2b))

              precision    recall  f1-score   support

           0       1.00      0.80      0.89         5
           1       1.00      0.67      0.80         3
           2       1.00      1.00      1.00         4
           3       1.00      1.00      1.00         2
           4       0.67      1.00      0.80         2
           5       1.00      1.00      1.00         4
           6       0.67      1.00      0.80         2
           7       1.00      1.00      1.00         2
           8       1.00      1.00      1.00         3

    accuracy                           0.93        27
   macro avg       0.93      0.94      0.92        27
weighted avg       0.95      0.93      0.93        27



In [264]:
conf_matrix = confusion_matrix(y_test2a, y_pred2b, normalize="true")
conf_matrix = np.around(conf_matrix.astype('float') /
                        conf_matrix.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix, text_auto=True)
fig.update_layout(
    title="Normalized Confusion Matrix for Drink Menu on XGBoost (ROS data)")
fig.show()

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa

Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a false alarm, with some parameters getting used by language bindings but
  then being mistakenly passed down to XGBoost core, or some parameter actually being used
  but getting flagged wrongly here. Please open an issue if you find any such cases.


Parameters: { "aplha" } might not be used.

  This could be a fa

### Analysis the results

In [229]:
print(classification_report(y_test2a, predictions2a))

              precision    recall  f1-score   support

           0       1.00      0.80      0.89         5
           1       1.00      0.33      0.50         3
           2       1.00      1.00      1.00         4
           3       1.00      1.00      1.00         2
           4       1.00      1.00      1.00         2
           5       0.80      1.00      0.89         4
           6       0.67      1.00      0.80         2
           7       1.00      1.00      1.00         2
           8       0.75      1.00      0.86         3

    accuracy                           0.89        27
   macro avg       0.91      0.90      0.88        27
weighted avg       0.92      0.89      0.88        27



In [263]:
conf_matrix = confusion_matrix(y_test2a, y_pred2a, normalize="true")
conf_matrix = np.around(conf_matrix.astype('float') /
                        conf_matrix.sum(axis=1)[:, np.newaxis], decimals=2)

fig = px.imshow(conf_matrix, text_auto=True)
fig.update_layout(
    title="Normalized Confusion Matrix for Drink Menu on XGBoost (ROS data)")
fig.show()