In [1]:
# mlp for multi-label classification
from numpy import mean
from numpy import std
from numpy import asarray
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score

import pandas as pd

In [2]:
# get the model
def get_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

In [3]:
# evaluate a model using repeated k-fold cross-validation
def evaluate_model(X, y):
    results = list()
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        # define model
        model = get_model(n_inputs, n_outputs)
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=100)
        # make a prediction on the test set
        yhat = model.predict(X_test)
        # round probabilities to class labels
        yhat = yhat.round()
        # calculate accuracy
        acc = accuracy_score(y_test, yhat)
        # store result
        print('>%.3f' % acc)
        results.append(acc)
    return results

In [4]:
# read data
df = pd.read_excel("menu_data(1).xlsx")
df.head()

Unnamed: 0,메뉴,빵,치즈,소스,1,2,3,4,5,6,7,8,9,10,11,12,13,14,15
0,8,3,1,"['4', '5']",0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
1,11,3,2,"['5', '14']",0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
2,18,3,2,"['5', '14']",0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
3,8,5,2,"['3', '5']",0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
4,5,5,1,"['1', '3', '5']",1,0,1,0,1,0,0,0,0,0,0,0,0,0,0


In [6]:
X = df.iloc[:, :3]
X = X.to_numpy()
X, X.shape

(array([[ 8,  3,  1],
        [11,  3,  2],
        [18,  3,  2],
        ...,
        [ 3,  2,  2],
        [ 3,  5,  3],
        [ 3,  4,  4]], dtype=int64),
 (1325, 3))

In [8]:
y = df.iloc[:, 4:]
y = y.to_numpy()
y, y.shape

(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 1, 0],
        ...,
        [0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 0, 1]], dtype=int64),
 (1325, 15))

In [9]:
# evaluate model
results = evaluate_model(X, y)

>0.038
>0.000
>0.038
>0.030
>0.030
>0.023
>0.030
>0.038
>0.030
>0.045
>0.008
>0.038
>0.045
>0.030
>0.038
>0.030
>0.008
>0.023
>0.030
>0.038
>0.015
>0.053
>0.023
>0.038
>0.030
>0.038
>0.023
>0.030
>0.038
>0.008


In [10]:
# summarize performance
print('Accuracy: %.3f (%.3f)' % (mean(results), std(results)))

Accuracy: 0.029 (0.012)


In [11]:
n_inputs, n_outputs = X.shape[1], y.shape[1]
# get model
model = get_model(n_inputs, n_outputs)
# fit the model on all data
model.fit(X, y, verbose=0, epochs=100)

<keras.callbacks.History at 0x24b3e4a5610>

In [13]:
# make a prediction for new data
newX = [[7,1,2]]
yhat = model.predict(newX)
print('Predicted: %s' % yhat[0])

Predicted: [0.29204327 0.049916   0.1937552  0.23930345 0.25335103 0.15011133
 0.07962473 0.02624276 0.2434418  0.09340324 0.05162863 0.14248359
 0.48305428 0.11214873 0.02121669]
