In [1]:
import numpy as np
import pandas as pd

In [2]:
from sklearn.metrics.pairwise import cosine_similarity

In [3]:
data = {
    "사용자": ['A', "B", "C", "D", "E"],
    "기생충": [5,4,1,0,3],
    "부산행": [0,2,0,4,0],
    "태극기 휘날리며": [3, 0, 5, 4, 0],
    "도둑들" : [1, 0, 4, 2, 4],
    "설국열차" : [0, 3, 0, 0 ,5],
    "범죄도시" : [2, 0, 4, 0, 3]
}

In [4]:
df = pd.DataFrame(data)
df.head()

Unnamed: 0,사용자,기생충,부산행,태극기 휘날리며,도둑들,설국열차,범죄도시
0,A,5,0,3,1,0,2
1,B,4,2,0,0,3,0
2,C,1,0,5,4,0,4
3,D,0,4,4,2,0,0
4,E,3,0,0,4,5,3


In [5]:
df = df.set_index('사용자')

In [6]:
df.head()

Unnamed: 0_level_0,기생충,부산행,태극기 휘날리며,도둑들,설국열차,범죄도시
사용자,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1
A,5,0,3,1,0,2
B,4,2,0,0,3,0
C,1,0,5,4,0,4
D,0,4,4,2,0,0
E,3,0,0,4,5,3


In [7]:
# 아이템과 사용자 행렬로 변환하기 
item_user_matrix = df.T
item_user_matrix.head()

사용자,A,B,C,D,E
기생충,5,4,1,0,3
부산행,0,2,0,4,0
태극기 휘날리며,3,0,5,4,0
도둑들,1,0,4,2,4
설국열차,0,3,0,0,5


In [8]:
# 코사인 유사도를 계산해보기 - 평점을 매긴 스타일을 비교
item_sim = cosine_similarity(item_user_matrix)
item_sim_df = pd.DataFrame(item_sim, index=df.columns, columns=df.columns)
item_sim_df.head()

Unnamed: 0,기생충,부산행,태극기 휘날리며,도둑들,설국열차,범죄도시
기생충,1.0,0.25049,0.396059,0.48343,0.648394,0.598059
부산행,0.25049,1.0,0.505964,0.294086,0.230089,0.0
태극기 휘날리며,0.396059,0.505964,1.0,0.720735,0.0,0.682793
도둑들,0.48343,0.294086,0.720735,1.0,0.563884,0.915844
설국열차,0.648394,0.230089,0.0,0.563884,1.0,0.477697


In [9]:
user = "A"
user_rating = df.loc[user]
user_rating

기생충         5
부산행         0
태극기 휘날리며    3
도둑들         1
설국열차        0
범죄도시        2
Name: A, dtype: int64

In [10]:
# 점수가 0인 - 즉, 아직 보지 않은 아이템
user_unrated_items = user_rating[user_rating==0].index.tolist()
user_unrated_items

['부산행', '설국열차']

In [11]:
# 추천점수를 계산해보기
recommend_dic = {}

for item in user_unrated_items: # 부산행, 설국열차
    # 현재 아이템에 대한 유사도 점수 가져오기
    sim_score = item_sim_df[item]

    # 사용자가 평점을 부여한 아이템 목록 가져오기
    user_rated_items = user_rating[user_rating > 0].index.tolist()

    # 사용자가 평점을 부여한 아이템에 대한 유사도 점수를 필터링해보기
    sim_score = sim_score[user_rated_items]

    # 사용자가 평점을 부여한 아이템의 평점가져오기
    user_rating_filtered = user_rating[user_rated_items]

    # 가중치 합을 계산
    weight_sum = sum(sim_score * user_rating_filtered)

    sim_sum = sum(sim_score)
    print(weight_sum, sim_sum)
    if sim_sum !=0:
        recommend_dic[item] = weight_sum/sim_sum
    else:
        recommend_dic[item] = 0

3.064427707888644 1.0505399908985236
4.761250225771824 1.6899755345863623


In [12]:
recommend_dic

{'부산행': 2.9170024315472736, '설국열차': 2.8173486114621094}