In [9]:
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
import pandas as pd

data = pd.read_excel('../data/카테고리_분류_2024-09-03.xlsx')
df = pd.DataFrame(data)
df.head()

all_columns = df.columns.tolist()
exclude_keywords = ['검색어', 'title', 'category']

filtered_columns = [col for col in all_columns if not any(keyword in col for keyword in exclude_keywords)]

results = {}

for idx, row in df.iterrows():
    title = row['title']
    results[title] = row.drop('title').to_dict()

selected_category = {
    '양식': 1,
    '일식': 1,
    '한식': 1,
    '중식': 1,
    '멕시코': 1
}

In [10]:
results

{'마야': {'검색어': '성수카페거리',
  'category': '멕시코>남미음식',
  '멕시코': 1,
  '남미음식': 1,
  '양식': 0,
  '한식': 0,
  '곱창': 0,
  '막창': 0,
  '양': 0,
  '카페': 0,
  '디저트': 0,
  '육류': 0,
  '고기요리': 0,
  '햄버거': 0,
  '일식': 0,
  '일식당': 0,
  '브런치': 0,
  '이탈리아음식': 0,
  '스파게티': 0,
  '파스타전문': 0,
  '돈가스': 0,
  '술집': 0,
  '이자카야': 0,
  '맥주': 0,
  '호프': 0,
  '분식': 0,
  '만두': 0,
  '족발': 0,
  '보쌈': 0,
  '칼국수': 0,
  '한정식': 0,
  '돼지고기구이': 0,
  '해물': 0,
  '생선요리': 0,
  '소고기구이': 0,
  '찌개': 0,
  '전골': 0,
  '냉면': 0,
  '요리주점': 0,
  '베이커리': 0,
  '닭볶음탕': 0,
  '장어': 0,
  '먹장어요리': 0,
  '일본식라면': 0,
  '초밥': 0,
  '롤': 0,
  '피자': 0,
  '국밥': 0,
  '패밀리레스토랑': 0,
  '닭갈비': 0,
  '중식': 0,
  '중식당': 0,
  '베트남음식': 0,
  '양꼬치': 0,
  '뷔페': 0,
  '곰탕': 0,
  '설렁탕': 0,
  '포장마차': 0,
  '바(BAR)': 0,
  '주꾸미요리': 0,
  '생선회': 0,
  '국수': 0,
  '아시아음식': 0,
  '와인': 0,
  '토스트': 0,
  '두부요리': 0,
  '닭요리': 0,
  '치킨': 0,
  '닭강정': 0,
  '감자탕': 0,
  '해장국': 0,
  '비빔밥': 0,
  '순대': 0,
  '순댓국': 0,
  '북카페': 0,
  '백반': 0,
  '가정식': 0,
  '김밥': 0,
  '케이크전문': 0,
  '태국음식': 0,
  '카레': 

In [11]:
# 벡터 생성 함수
def create_rating_vector(results, filtered_columns):
    vector = []
    for column in filtered_columns:
        vector.append(results.get(column, 0))
    return np.array(vector)

# 사용자별 벡터 생성
vectors = []
for result in results:
    vectors.append(create_rating_vector(results[result], filtered_columns))
vectors = np.array(vectors)
vectors

array([[1, 1, 0, ..., 0, 0, 0],
       [0, 0, 1, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       ...,
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0],
       [0, 0, 0, ..., 0, 0, 0]])

In [12]:
# 새로운 사용자의 벡터 생성
new_vector = create_rating_vector(selected_category, filtered_columns)
new_vector

array([1, 0, 1, 1, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
       0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0])

In [13]:
# 코사인 유사도 계산
similarities = cosine_similarity([new_vector], vectors)[0]
similarities

array([0.31622777, 0.4472136 , 0.2236068 , 0.        , 0.25819889,
       0.        , 0.31622777, 0.31622777, 0.        , 0.31622777,
       0.4472136 , 0.31622777, 0.4472136 , 0.25819889, 0.        ,
       0.4472136 , 0.4472136 , 0.31622777, 0.25819889, 0.        ,
       0.        , 0.4472136 , 0.        , 0.        , 0.25819889,
       0.31622777, 0.4472136 , 0.25819889, 0.        , 0.31622777,
       0.25819889, 0.25819889, 0.25819889, 0.4472136 , 0.25819889,
       0.31622777, 0.31622777, 0.        , 0.25819889, 0.31622777,
       0.        , 0.25819889, 0.31622777, 0.        , 0.25819889,
       0.25819889, 0.        , 0.25819889, 0.31622777, 0.25819889,
       0.31622777, 0.25819889, 0.31622777, 0.31622777, 0.        ,
       0.25819889, 0.        , 0.        , 0.        , 0.31622777,
       0.31622777, 0.31622777, 0.        , 0.4472136 , 0.25819889,
       0.25819889, 0.31622777, 0.        , 0.        , 0.31622777,
       0.4472136 , 0.        , 0.        , 0.        , 0.     

In [14]:
# 가장 유사한 사용자의 인덱스 찾기
most_similar_index = np.argmax(similarities)
most_similar_index

1

In [15]:
# 가장 유사한 사용자
most_similar_one = list(results.keys())[most_similar_index]
most_similar_one

'봄의정원 성수점'

In [16]:
# 추천 영화 리스트 생성
recommended = {}
for result, place in results[most_similar_one].items():
    if result not in selected_category:
        recommended[result] = place

print(f"Most similar user: {most_similar_one}")
print("Recommended:", recommended['검색어'])

Most similar user: 봄의정원 성수점
Recommended: 성수카페거리


In [19]:
results[most_similar_one]

{'검색어': '성수카페거리',
 'category': '음식점>양식',
 '멕시코': 0,
 '남미음식': 0,
 '양식': 1,
 '한식': 0,
 '곱창': 0,
 '막창': 0,
 '양': 0,
 '카페': 0,
 '디저트': 0,
 '육류': 0,
 '고기요리': 0,
 '햄버거': 0,
 '일식': 0,
 '일식당': 0,
 '브런치': 0,
 '이탈리아음식': 0,
 '스파게티': 0,
 '파스타전문': 0,
 '돈가스': 0,
 '술집': 0,
 '이자카야': 0,
 '맥주': 0,
 '호프': 0,
 '분식': 0,
 '만두': 0,
 '족발': 0,
 '보쌈': 0,
 '칼국수': 0,
 '한정식': 0,
 '돼지고기구이': 0,
 '해물': 0,
 '생선요리': 0,
 '소고기구이': 0,
 '찌개': 0,
 '전골': 0,
 '냉면': 0,
 '요리주점': 0,
 '베이커리': 0,
 '닭볶음탕': 0,
 '장어': 0,
 '먹장어요리': 0,
 '일본식라면': 0,
 '초밥': 0,
 '롤': 0,
 '피자': 0,
 '국밥': 0,
 '패밀리레스토랑': 0,
 '닭갈비': 0,
 '중식': 0,
 '중식당': 0,
 '베트남음식': 0,
 '양꼬치': 0,
 '뷔페': 0,
 '곰탕': 0,
 '설렁탕': 0,
 '포장마차': 0,
 '바(BAR)': 0,
 '주꾸미요리': 0,
 '생선회': 0,
 '국수': 0,
 '아시아음식': 0,
 '와인': 0,
 '토스트': 0,
 '두부요리': 0,
 '닭요리': 0,
 '치킨': 0,
 '닭강정': 0,
 '감자탕': 0,
 '해장국': 0,
 '비빔밥': 0,
 '순대': 0,
 '순댓국': 0,
 '북카페': 0,
 '백반': 0,
 '가정식': 0,
 '김밥': 0,
 '케이크전문': 0,
 '태국음식': 0,
 '카레': 0,
 '부동산': 0,
 '관리업': 0,
 '숙박': 0,
 '콘도': 0,
 '리조트': 0,
 '모텔': 0,
 '호텔': 0,
 '퓨전음식': 0,
 