In [1]:
import pandas as pd
import numpy as np
from sklearn.metrics import mean_squared_error

In [2]:
# 모델 관련 함수 정의

In [3]:
# 변수 상수 저장
PREDICT_USER = 89
SELECT_N_MENU = 14
K_VALUE = 52

In [4]:
# 실제 R 행렬과 예측 행렬의 오차를 구하는 함수
def calculate_rmse(R, P, Q, non_zeros):
    error = 0

    full_pred_matrix = np.dot(P, Q.T)

    # 여기서 non_zeros는 아래 함수에서 확인할 수 있다.
    x_non_zero_ind = [non_zeros[0] for non_zeros in non_zeros]
    y_non_zero_ind = [non_zeros[1] for non_zeros in non_zeros]

    # 원 행렬 R에서 0이 아닌 값들만 추출한다.
    R_non_zeros = R[x_non_zero_ind, y_non_zero_ind]

    # 예측 행렬에서 원 행렬 R에서 0이 아닌 위치의 값들만 추출하여 저장한다.
    full_pred_matrix_non_zeros = full_pred_matrix[x_non_zero_ind, y_non_zero_ind]

    mse = mean_squared_error(R_non_zeros, full_pred_matrix_non_zeros)
    rmse = np.sqrt(mse)

    return rmse

# 행렬분해하는 함수
def matrix_factorization(R, K, steps=200, learning_rate=0.01, r_lambda=0.01):
    num_users, num_items = R.shape

    np.random.seed(1)
    P = np.random.normal(scale=1.0/K, size=(num_users, K))
    Q = np.random.normal(scale=1.0/K, size=(num_items, K))
    
    # R>0인 행 위치, 열 위치, 값을 non_zeros 리스트에 저장한다.
    non_zeros = [ (i, j, R[i, j]) for i in range(num_users)
                  for j in range(num_items) if R[i, j] > 0 ]

    # SGD 기법으로 P, Q 매트릭스를 업데이트 함
    for step in range(steps):
        for i, j, r in non_zeros:
            # 잔차 구함
            eij = r - np.dot(P[i, :], Q[j, :].T)

            # Regulation을 반영한 SGD 업데이터 적용
            P[i, :] = P[i, :] + learning_rate*(eij * Q[j, :] - r_lambda*P[i, :])
            Q[j, :] = Q[j, :] + learning_rate*(eij * P[i, :] - r_lambda*Q[j, :])

        rmse = calculate_rmse(R, P, Q, non_zeros)
        if step % 10 == 0:
            print("iter step: {0}, rmse: {1:4f}".format(step, rmse))

    return P, Q

# 예측 확률 구하기
def predict(P, Q):
    return np.dot(P, Q.T)

# 에측결과를 토대로 메뉴 선택하기
def predicted_menu(predicted, userNo, nMenu):
    user_like_menu = predicted[userNo-1].copy();
    predicted_name = []
    print(user_like_menu)
    for i in range(0,nMenu):
        max_idx = np.argmax(user_like_menu)
        if user_like_menu[max_idx] < 0 or user_like_menu[max_idx] == 0 : break
        predicted_name += [user_info.columns[max_idx]]
        # print(i, " : ", user_info.columns[max_idx])
        user_like_menu[max_idx] = 0
    return predicted_name

In [5]:
# 데이터 불러오기

In [6]:
excel_file_path = './data/dataTmp.xlsx'

# 엑셀 파일의 각 시트를 다른 변수에 저장
menu_info = pd.read_excel(excel_file_path, sheet_name='Sheet1', header = 0, index_col = 0)
user_info = pd.read_excel(excel_file_path, sheet_name='Sheet2', header = 0, index_col = 0)

# 각 시트 데이터 확인
# print("Sheet1 데이터:")
# print(menu_info.head())

# print("\nSheet2 데이터:")
# print(user_info.head())

In [7]:
# print제한 없애기
pd.set_option('display.max_seq_items', None)
pd.set_option('display.max_rows', None)
pd.set_option('display.max_columns', None)

In [8]:
# 사용자 - 메뉴 정보가 저장된 데이터프레임 배열로 바꾸기
user_info.arr = user_info.to_numpy()

  user_info.arr = user_info.to_numpy()


In [9]:
### 사용자 - 선호메뉴 데이터를 통한 매뉴 추천

In [10]:
P, Q = matrix_factorization(user_info.arr, K=K_VALUE)
predicted_R = predict(P,Q)

iter step: 0, rmse: 0.999499
iter step: 10, rmse: 0.991292
iter step: 20, rmse: 0.910213
iter step: 30, rmse: 0.500667
iter step: 40, rmse: 0.184473
iter step: 50, rmse: 0.072086
iter step: 60, rmse: 0.035142
iter step: 70, rmse: 0.024153
iter step: 80, rmse: 0.020915
iter step: 90, rmse: 0.019772
iter step: 100, rmse: 0.019226
iter step: 110, rmse: 0.018873
iter step: 120, rmse: 0.018592
iter step: 130, rmse: 0.018344
iter step: 140, rmse: 0.018113
iter step: 150, rmse: 0.017896
iter step: 160, rmse: 0.017688
iter step: 170, rmse: 0.017489
iter step: 180, rmse: 0.017298
iter step: 190, rmse: 0.017114


In [11]:
print(predicted_R.shape)

(90, 14)


In [12]:
predict_menu = predicted_menu(predicted_R, PREDICT_USER, SELECT_N_MENU)
print(predict_menu)

[ 0.98645624  0.91595497  0.97084806  0.94144697  0.93262721  0.97235441
  0.96748949  0.98432651  0.92732028  0.93158309 -0.0012861   0.0038446
 -0.00538655 -0.01091095]
['바지락칼국수', '고등어구이', '샤부샤부', '해물찜', '김치찌개', '초밥', '돈가스', '짜장면', '햄버거', '떡볶이', '삼겹살']


In [13]:
for i in range(0, user_info.shape[1]):
    if user_info.iloc[9,i] == 1 :
        print(user_info.columns[i])

해물찜
돈가스
샤부샤부
햄버거
짜장면


In [14]:
## 사용자 - 재료, 재료 - 선호메뉴 기반 메뉴 추천

In [15]:
user_igd_info = pd.DataFrame(0, index=[f"User{i+1}" for i in range(0, user_info.shape[0])], columns=[f"{i}" for i in menu_info.columns])

In [16]:
for row in range(0,user_info.shape[0]):
    for col in range(0, user_info.shape[1]):
        if user_info.iloc[row,col] > 0:
            for igdIdx in range(0, menu_info.shape[1]):
                if menu_info.iloc[col,igdIdx] > 0:
                    user_igd_info.iloc[row, igdIdx] += menu_info.iloc[col,igdIdx]
                    # user_igd_info.iloc[row, igdIdx] += 1

#print(user_igd_info)

In [17]:
user_igd_info.arr = user_igd_info.to_numpy()
menu_info.arr = menu_info.to_numpy()

  user_igd_info.arr = user_igd_info.to_numpy()
  menu_info.arr = menu_info.to_numpy()


In [18]:
predicted_R_V2 = predict(user_igd_info.arr,menu_info.arr)

In [19]:
predict_menu_V2 = predicted_menu(predicted_R_V2, PREDICT_USER, SELECT_N_MENU)

[54  6 45 26  0 66 11 42  9 10 15 10 10 45]


In [20]:
for i in range(0, user_info.shape[1]):
    if user_info.iloc[9,i] == 1 :
        print(user_info.columns[i])

해물찜
돈가스
샤부샤부
햄버거
짜장면


In [21]:
print(user_igd_info.arr.shape)
print(menu_info.arr.shape)

(90, 52)
(14, 52)


In [22]:
# 내가 먼저 분해해놓고 활용하기
def predict_V3(R, P_tmp, Q_tmp, steps=200, learning_rate=0.01, r_lambda=0.01):
    
    num_users, num_items = R.shape
    
    P = P_tmp.copy() * 0.1
    Q = Q_tmp.copy() * 0.1
    
    # R>0인 행 위치, 열 위치, 값을 non_zeros 리스트에 저장한다.
    non_zeros = [ (i, j, R[i, j]) for i in range(num_users)
                  for j in range(num_items) if R[i, j] > 0 ]
    
    # SGD 기법으로 P, Q 매트릭스를 업데이트 함
    for step in range(steps):
        for i, j, r in non_zeros:
            # 잔차 구함
            eij = r - np.dot(P[i, :], Q[j, :].T)
    
            # Regulation을 반영한 SGD 업데이터 적용
            P[i, :] = P[i, :] + learning_rate*(eij * Q[j, :] - r_lambda*P[i, :])
            Q[j, :] = Q[j, :] + learning_rate*(eij * P[i, :] - r_lambda*Q[j, :])
    
        rmse = calculate_rmse(R, P, Q, non_zeros)
        if step % 10 == 0:
            print("iter step: {0}, rmse: {1:4f}".format(step, rmse))

    return np.dot(P, Q.T)

In [23]:
predicted_R_V3 = predict_V3(user_info.arr, user_igd_info.arr, menu_info.arr)

iter step: 0, rmse: 0.284377
iter step: 10, rmse: 0.193537
iter step: 20, rmse: 0.148123
iter step: 30, rmse: 0.115628
iter step: 40, rmse: 0.091655
iter step: 50, rmse: 0.073690
iter step: 60, rmse: 0.060135
iter step: 70, rmse: 0.049886
iter step: 80, rmse: 0.042136
iter step: 90, rmse: 0.036273
iter step: 100, rmse: 0.031828
iter step: 110, rmse: 0.028442
iter step: 120, rmse: 0.025842
iter step: 130, rmse: 0.023821
iter step: 140, rmse: 0.022228
iter step: 150, rmse: 0.020951
iter step: 160, rmse: 0.019907
iter step: 170, rmse: 0.019037
iter step: 180, rmse: 0.018297
iter step: 190, rmse: 0.017657


In [24]:
predict_menu_V3 = predicted_menu(predicted_R_V3, PREDICT_USER, SELECT_N_MENU)
print(predict_menu_V3)
print(predict_menu_V2)

[0.9884528  0.3190298  0.50075375 0.33315573 0.25334037 0.96681196
 0.36006376 0.9837135  0.2894479  0.36336494 0.16445463 0.11750643
 0.11713065 0.479311  ]
['바지락칼국수', '고등어구이', '샤부샤부', '해물찜', '어쨌든해물음식', '짜장면', '김치찌개', '초밥', '떡볶이', '햄버거', '돈가스', '치킨', '삼겹살', '돼지갈비']
['샤부샤부', '바지락칼국수', '해물찜', '어쨌든해물음식', '고등어구이', '초밥', '치킨', '김치찌개', '짜장면', '삼겹살', '돼지갈비', '햄버거', '떡볶이']


In [25]:
print(user_info.index[PREDICT_USER-1])
for i in range(0, user_info.shape[1]):
    if user_info.iloc[PREDICT_USER-1,i] == 1 :
        print(user_info.columns[i])

User89
바지락칼국수
샤부샤부
고등어구이


In [26]:
## 사용자 - 재료 기반 메뉴 추천

# 사용자 - 재료 사이의 관계를 잠재요인 분석으로 행렬분해
# 해당 결과를 통해 선호 재료 선택
# 해당 선호 재료 배열과 메뉴별 재료 배열을 곱해서 가장 연관성이 높은 메뉴 선택

In [36]:
# 선호 재료 출력하기
def predicted_menu_V4(predicted, userNo, nMenu):
    user_like_igd = predicted[userNo-1].copy();
    predicted_name = []

    user_like_menu = predict(user_like_igd, menu_info.arr*0.1)
    print(user_like_menu)
    for i in range(0,nMenu):
        max_idx = np.argmax(user_like_menu)
        if user_like_menu[max_idx] < 0 or user_like_menu[max_idx] == 0 : break
        predicted_name += [user_info.columns[max_idx]]
        # print(i, " : ", user_info.columns[max_idx])
        user_like_menu[max_idx] = 0
    return predicted_name

In [28]:
print(user_igd_info.arr)

[[3 3 2 ... 0 0 6]
 [0 0 0 ... 0 0 3]
 [3 3 2 ... 0 0 8]
 ...
 [3 3 2 ... 0 0 6]
 [3 3 2 ... 0 0 8]
 [3 3 2 ... 0 0 8]]


In [29]:
P_V4, Q_V4 = matrix_factorization(user_igd_info.arr, K = K_VALUE, learning_rate=0.01, steps=500)

iter step: 0, rmse: 3.166521
iter step: 10, rmse: 0.822026
iter step: 20, rmse: 0.520994
iter step: 30, rmse: 0.319962
iter step: 40, rmse: 0.226035
iter step: 50, rmse: 0.162859
iter step: 60, rmse: 0.128929
iter step: 70, rmse: 0.109711
iter step: 80, rmse: 0.096907
iter step: 90, rmse: 0.087506
iter step: 100, rmse: 0.080148
iter step: 110, rmse: 0.074104
iter step: 120, rmse: 0.068978
iter step: 130, rmse: 0.064559
iter step: 140, rmse: 0.060736
iter step: 150, rmse: 0.057438
iter step: 160, rmse: 0.054611
iter step: 170, rmse: 0.052202
iter step: 180, rmse: 0.050162
iter step: 190, rmse: 0.048440
iter step: 200, rmse: 0.046991
iter step: 210, rmse: 0.045773
iter step: 220, rmse: 0.044748
iter step: 230, rmse: 0.043885
iter step: 240, rmse: 0.043155
iter step: 250, rmse: 0.042538
iter step: 260, rmse: 0.042013
iter step: 270, rmse: 0.041565
iter step: 280, rmse: 0.041181
iter step: 290, rmse: 0.040850
iter step: 300, rmse: 0.040563
iter step: 310, rmse: 0.040314
iter step: 320, rms

In [37]:
predicted_R_V4 = predict(P_V4, Q_V4)

In [38]:
predict_menu_V4 = predicted_menu_V4(predicted_R_V4, PREDICT_USER, K_VALUE)
print(predict_menu_V4)

[5.39915316 2.91429835 7.09801216 8.16350325 1.6434838  6.58053702
 3.31302443 4.18125638 4.08848615 3.18911476 2.99325916 2.0524545
 2.17920251 7.09801216]
['초밥', '해물찜', '어쨌든해물음식', '샤부샤부', '바지락칼국수', '고등어구이', '햄버거', '김치찌개', '짜장면', '치킨', '떡볶이', '돼지갈비', '삼겹살', '돈가스']


In [32]:
predicted_R_V4

array([[ 2.97434669e+00,  2.99874368e+00,  1.98299112e+00, ...,
        -1.14342130e-02,  3.21996096e-02,  5.98009222e+00],
       [ 2.62568761e+00,  2.24029202e+00,  1.75066200e+00, ...,
        -6.16831947e-03,  3.72975009e-02,  3.04713020e+00],
       [ 2.99659642e+00,  3.00898562e+00,  1.99782100e+00, ...,
        -2.11208345e-02,  2.40962128e-02,  7.96288345e+00],
       ...,
       [ 2.98910474e+00,  3.00866578e+00,  1.99272381e+00, ...,
        -1.54033251e-02,  2.93363366e-02,  5.97651875e+00],
       [ 3.00618146e+00,  3.01777130e+00,  2.00421872e+00, ...,
        -3.81961275e-02,  4.33863414e-02,  7.95736449e+00],
       [ 3.00530137e+00,  3.01856092e+00,  2.00355154e+00, ...,
        -3.69719142e-02,  4.28110387e-02,  7.95636358e+00]])

In [33]:
### 결과 비교

In [34]:
print(predict_menu)
print(predict_menu_V2)
print(predict_menu_V3)
print(predict_menu_V4)

['바지락칼국수', '고등어구이', '샤부샤부', '해물찜', '김치찌개', '초밥', '돈가스', '짜장면', '햄버거', '떡볶이', '삼겹살']
['샤부샤부', '바지락칼국수', '해물찜', '어쨌든해물음식', '고등어구이', '초밥', '치킨', '김치찌개', '짜장면', '삼겹살', '돼지갈비', '햄버거', '떡볶이']
['바지락칼국수', '고등어구이', '샤부샤부', '해물찜', '어쨌든해물음식', '짜장면', '김치찌개', '초밥', '떡볶이', '햄버거', '돈가스', '치킨', '삼겹살', '돼지갈비']
['초밥', '해물찜', '어쨌든해물음식', '샤부샤부', '바지락칼국수', '고등어구이', '햄버거', '김치찌개', '짜장면', '치킨', '떡볶이', '돼지갈비', '삼겹살', '돈가스']


In [35]:
for i in range(0, user_info.shape[1]):
    if user_info.iloc[PREDICT_USER-1,i] == 1 :
        print(user_info.columns[i])

바지락칼국수
샤부샤부
고등어구이
