In [1]:
# PATH_NAME = 'C:/Users/gkstk/OneDrive/Desktop/SangMin/Github/AI/data/'
# PATH_NAME2 = 'C:/Users/gkstk/OneDrive/Desktop/SangMin/Github/AI/'


In [2]:
PATH_NAME = 'C:/Users/woobi/Documents/habit/habit-AI/data/'
PATH_NAME2 = 'C:/Users/woobi/Documents/habit/habit-AI/'

In [3]:
import pandas as pd
import numpy as np
import re
from konlpy.tag import Okt
from sklearn.metrics.pairwise import cosine_similarity
from gensim.models import KeyedVectors
import pickle
import math

In [4]:
# 서버 실행시 한번만 로드 할 수 있도록 할 것
model = KeyedVectors.load(PATH_NAME + "한국어_음식모델_한상민.kv", mmap='r')

with open(PATH_NAME + 'wweia_synonym_cats.pickle', 'rb') as handle:
  wweia_synonym_cats = pickle.load(handle)
  
wweia_food_categories = pd.read_csv(PATH_NAME + 'wweia_food_categories_addtl.csv')
wweia_data = pd.read_csv(PATH_NAME + 'wweia_data.csv')
wweia_embeddings = pd.read_csv(PATH_NAME + 'word_embeddings.csv', delimiter = ",")

In [5]:
stop_words = ['가', '걍', '것', '고', '과', '는', '도', '들', '등', '때', '로', '를', '뿐', '수', '아니', '않', '없', '에', '에게', '와', '으로', '은', '의', '이', '이다', '있', '자', '잘', '좀', '하다', '한', '조각', '개', '것', '대', '소' ,'단계', '등급', '포함', '미니', '개입']

In [6]:
def reduce_with_food_words(rough_phrase):
  korean_string = re.sub("[^ㄱ-ㅎㅏ-ㅣ가-힣 ]", " ", rough_phrase)
  okt = Okt()
  token = okt.morphs(korean_string, stem=True)
  
  return token

In [7]:
def process_food_log(curr_log, wweia_synonym_cats):
  curr_log['predicted_categories_number'] = 0
  curr_log['predicted_categories_words'] = ""
  curr_log['max_cosim_score'] = 0
  curr_log['most_sim_food'] = ""
  curr_log['reciprocal_rank'] = 0.0
  curr_log['sym_reciprocal_rank'] = 0.0

  for i in range(curr_log.shape[0]):
    food_name = curr_log.loc[i, 'Food Name']
    pre_embedding = reduce_with_food_words(food_name)

    word_embed = np.zeros(shape = (1, len(model["불고기"])))
    if len(pre_embedding) > 0:
      
      num_words = 0
      for word in pre_embedding:
        word = word.lower()
        
        if word in model:
          num_words += 1
          word_embed += model[word]

      if num_words != 0:
        word_embed /= num_words
    
    # print("음식 이름")
    # print(food_name)
    # print("프리임베딩")
    # print(pre_embedding)
    # print(word_embed)
    # print('\n')
    
    similarities = cosine_similarity(word_embed, wweia_embeddings)
    to_keep_args = np.argsort(similarities, axis=1)
    indices = np.flip(to_keep_args, axis = 1)

    most_sim_food_row = wweia_data.iloc[indices[0,0], :]
    highest_cat_num = most_sim_food_row['NO']
    highest_cat_words = wweia_food_categories.loc[wweia_food_categories['NO'] == highest_cat_num, '식품명']
    curr_log.loc[i, 'predicted_categories_number'] = highest_cat_num
    curr_log.loc[i, 'predicted_categories_words'] = highest_cat_words.to_list()[0]
    
  return curr_log

In [11]:
# Main method
def food_recommandation(input_food_list) :

  input_list= ["wweia_food_category_code", "Food Name", "wweia_food_category_description"]
  curr_log = pd.DataFrame(input_food_list, columns=input_list)
  
  curr_log = process_food_log(curr_log, wweia_synonym_cats)

  print("true 출력 ")
  first_list = list(set(curr_log.loc[:,'wweia_food_category_code'].tolist()))
  print(first_list)

  print("pred 출력 ")
  second_list = list(set(curr_log.loc[:,'predicted_categories_number'].tolist()))
  print(second_list)
  
  print("출력 ")
  last_list = first_list + second_list
  last_list = list(set(last_list))
  print(last_list)
  
  category_info_list = []
  for category_num in last_list:
      category_row = wweia_food_categories[wweia_food_categories['NO'] == category_num].iloc[0]
      category_dict = {
          'foodId': category_row['NO'],
          'name': category_row['식품명'],
          'category': category_row['식품상세분류']
      }
      print(category_dict)
      category_info_list.append(category_dict)
  
  print(category_info_list)

In [12]:
two_dimensional_array = [
    [1, "메밀전병", "곡류 및 서류"],
    [2, "약식", "곡류 및 서류"],
    [3, "무지개떡", "곡류 및 서류"],
    [45, "초코 마카롱", "과자류"],
    [106, "레몬머랭", "과자류"],
    [105, "코코넛머랭", "과자류"],
    [335, "홍합미역국", "국 및 탕류"]
]
food_recommandation(two_dimensional_array)

true 출력 
[1, 2, 3, 105, 106, 45, 335]
pred 출력 
[9, 237, 816, 2288, 55, 701, 2015]
출력 
[1, 2, 3, 105, 106, 9, 45, 237, 335, 816, 2288, 55, 701, 2015]
{'foodId': 1, 'name': '메밀전병', 'category': '곡류 및 서류'}
{'foodId': 2, 'name': '수수부꾸미', 'category': '곡류 및 서류'}
{'foodId': 3, 'name': '약식', 'category': '곡류 및 서류'}
{'foodId': 105, 'name': '코코넛머랭', 'category': '기타 과자류'}
{'foodId': 106, 'name': '레몬머랭', 'category': '기타 과자류'}
{'foodId': 9, 'name': '송편(깨)', 'category': '떡류'}
{'foodId': 45, 'name': '초코 마카롱', 'category': '기타 과자류'}
{'foodId': 237, 'name': '3mm 황금비율로 바삭하고 고소한 전병세트', 'category': '한과류'}
{'foodId': 335, 'name': '홍합미역국', 'category': '어패류국.탕'}
{'foodId': 816, 'name': '멥쌀, 현미, 해담쌀, 생것', 'category': '곡류 및 그 제품'}
{'foodId': 2288, 'name': '새우, 젓갈, 추젓', 'category': '어패류 및 기타 수산물'}
{'foodId': 55, 'name': '초코롱 산딸기', 'category': '기타 과자류'}
{'foodId': 701, 'name': '귀리, 쌀귀리, 도정, 생것', 'category': '곡류 및 그 제품'}
{'foodId': 2015, 'name': '장어, 뱀장어, 간, 생것', 'category': '어패류 및 기타 수산물'}
[{'foodId': 1, 'name': '메