In [1]:
# mlp for multi-label classification
from numpy import mean
from numpy import std
from numpy import asarray
from sklearn.datasets import make_multilabel_classification
from sklearn.model_selection import RepeatedKFold
from keras.models import Sequential
from keras.layers import Dense
from sklearn.metrics import accuracy_score

import pandas as pd
import numpy as np

In [2]:
# get the model
def get_model(n_inputs, n_outputs):
    model = Sequential()
    model.add(Dense(20, input_dim=n_inputs, kernel_initializer='he_uniform', activation='relu'))
    model.add(Dense(n_outputs, activation='sigmoid'))
    model.compile(loss='binary_crossentropy', optimizer='adam')
    return model

In [3]:
# evaluate a model using repeated k-fold cross-validation
def evaluate_model(X, y):
    results = list()
    n_inputs, n_outputs = X.shape[1], y.shape[1]
    # define evaluation procedure
    cv = RepeatedKFold(n_splits=10, n_repeats=3, random_state=1)
    # enumerate folds
    for train_ix, test_ix in cv.split(X):
        # prepare data
        X_train, X_test = X[train_ix], X[test_ix]
        y_train, y_test = y[train_ix], y[test_ix]
        # define model
        model = get_model(n_inputs, n_outputs)
        # fit model
        model.fit(X_train, y_train, verbose=0, epochs=100)
        # make a prediction on the test set
        yhat = model.predict(X_test)
        # round probabilities to class labels
        yhat = yhat.round()
        # calculate accuracy
        acc = accuracy_score(y_test, yhat)
        # store result
        print('>%.3f' % acc)
        results.append(acc)
    return results

In [4]:
des = pd.read_excel('menu_description.xlsx')
des

Unnamed: 0,idx,menu,description
0,4,쉬림프,Juicy and fresh shrimp. Surely you can not res...
1,5,로티세리 바비큐 치킨,The best chicken with the best preparation! Th...
2,6,이탈리안 비엠티,An old world favorite sandwich that is made up...
3,7,로스트 치킨,Lightly seasoned chicken that is roasted to pe...
4,8,에그마요,A simple recipe anyone will love with a thick ...
5,9,K-바비큐,"Subway's first Korean style sandwich! Garlic, ..."
6,10,풀드 포크 바비큐,"Succulent, low and slow cooked pulled pork wit..."
7,11,비엘티,Mouth-watering bacon paired with crispy lettuc...
8,12,햄,Ham is the ultimate lunchtime staple. Jazz it ...
9,13,참치,Another gift from the sea of simple yet sumptu...


In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.feature_extraction.text import ENGLISH_STOP_WORDS
tfidf = TfidfVectorizer(stop_words='english')

In [6]:
# tfidf 벡터화
tfidf_matrix = tfidf.fit_transform(des['description'])

In [7]:
tfidf_matrix = tfidf_matrix.toarray()

In [8]:
# read data
df = pd.read_excel("menu_data(1).xlsx")
df.columns = ['메뉴', '빵', '치즈', '소스', '랜치', '마요네즈', '스위트 어니언', '허니 머스타드', '스위트 칠리', '핫 칠리',
              '사우스웨스트 치폴레', '머스타드', '홀스래디쉬', '올리브 오일', '레드와인식초', '소금', '후추', '스모크 바비큐', '이탈리안 드레싱']
df.head()

Unnamed: 0,메뉴,빵,치즈,소스,랜치,마요네즈,스위트 어니언,허니 머스타드,스위트 칠리,핫 칠리,사우스웨스트 치폴레,머스타드,홀스래디쉬,올리브 오일,레드와인식초,소금,후추,스모크 바비큐,이탈리안 드레싱
0,8,3,1,"['4', '5']",0,0,0,1,1,0,0,0,0,0,0,0,0,0,0
1,11,3,2,"['5', '14']",0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
2,18,3,2,"['5', '14']",0,0,0,0,1,0,0,0,0,0,0,0,0,1,0
3,8,5,2,"['3', '5']",0,0,1,0,1,0,0,0,0,0,0,0,0,0,0
4,5,5,1,"['1', '3', '5']",1,0,1,0,1,0,0,0,0,0,0,0,0,0,0


In [9]:
temp = []
for i in range(df.shape[0]):
    temp.append(tfidf_matrix[df.loc[i]['메뉴']-4])
tmp = np.array(temp)
tmp

array([[0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.07729536, 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       ...,
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ],
       [0.        , 0.        , 0.        , ..., 0.        , 0.        ,
        0.        ]])

In [10]:
df_new = pd.concat([df, pd.DataFrame(tmp)], axis=1)

In [11]:
df_new.head()

Unnamed: 0,메뉴,빵,치즈,소스,랜치,마요네즈,스위트 어니언,허니 머스타드,스위트 칠리,핫 칠리,...,367,368,369,370,371,372,373,374,375,376
0,8,3,1,"['4', '5']",0,0,0,1,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,11,3,2,"['5', '14']",0,0,0,0,1,0,...,0.0,0.086614,0.0,0.0,0.0,0.0,0.099747,0.077295,0.0,0.0
2,18,3,2,"['5', '14']",0,0,0,0,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,8,5,2,"['3', '5']",0,0,1,0,1,0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,5,5,1,"['1', '3', '5']",1,0,1,0,1,0,...,0.0,0.0,0.0,0.142475,0.0,0.0,0.0,0.0,0.142475,0.142475


In [16]:
X = df_new.drop(columns=['소스','랜치', '마요네즈', '스위트 어니언', '허니 머스타드', '스위트 칠리', '핫 칠리', '사우스웨스트 치폴레',
                         '머스타드', '홀스래디쉬', '올리브 오일', '레드와인식초', '소금', '후추', '스모크 바비큐', '이탈리안 드레싱'])
X = X.to_numpy()
X, X.shape

(array([[ 8.        ,  3.        ,  1.        , ...,  0.        ,
          0.        ,  0.        ],
        [11.        ,  3.        ,  2.        , ...,  0.07729536,
          0.        ,  0.        ],
        [18.        ,  3.        ,  2.        , ...,  0.        ,
          0.        ,  0.        ],
        ...,
        [ 3.        ,  2.        ,  2.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 3.        ,  5.        ,  3.        , ...,  0.        ,
          0.        ,  0.        ],
        [ 3.        ,  4.        ,  4.        , ...,  0.        ,
          0.        ,  0.        ]]),
 (1325, 380))

In [17]:
y = df.iloc[:, 4:]
y = y.to_numpy()
y, y.shape

(array([[0, 0, 0, ..., 0, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 1, 0],
        ...,
        [0, 0, 0, ..., 1, 0, 0],
        [0, 0, 0, ..., 0, 1, 0],
        [0, 0, 0, ..., 0, 0, 1]], dtype=int64),
 (1325, 15))

In [18]:
# evaluate model
results = evaluate_model(X, y)

>0.098
>0.015
>0.083
>0.120
>0.060
>0.053
>0.106
>0.091
>0.030
>0.098
>0.053
>0.090
>0.060
>0.060
>0.045
>0.091
>0.061
>0.068
>0.068
>0.091
>0.068
>0.083
>0.053
>0.060
>0.045
>0.053
>0.061
>0.061
>0.083
>0.076


In [19]:
# summarize performance
print('Accuracy: %.3f (%.3f)' % (mean(results), std(results)))

Accuracy: 0.069 (0.023)


In [20]:
n_inputs, n_outputs = X.shape[1], y.shape[1]
# get model
model = get_model(n_inputs, n_outputs)
# fit the model on all data
model.fit(X, y, verbose=0, epochs=100)

<keras.callbacks.History at 0x18decbbe400>