# 아이템 기반 협업필터링

In [1]:
# 헙업 필터링 -  다른유저의 정보를 사용한다
# 최근접 이웃 협업 필터링 knn
#     사용자기반
#         사용자 사용자간의 유사도 기준추천 : 나와 취향이 비슷한 사람이 좋아하는 것을 보여준다
#         비슷한 취향의 사람을 n명 만큼 뽑아서 사용한다 -> top-n방식 
#     아이템기반
#         해당 아이템과 평점 분포가 비슷한 아이템을 추천 해주는것 -> 사용자가 해당 아이템을 좋아하느냐 아니냐가 추천의 기준이 된다
        

# 잠재 요인 협업 필터링


# 데이터 정재하기

In [2]:
# 수집한 데이터중 리뷰가 1개인 맥주들이 있다
# 리뷰를 남긴 유저도 마찬가지이다
# 이와같은 경우 추천시스템 저하의 원인이 되므로 최소 10개 이상의 리뷰를 가진 맥주와 유저의 데이터셋을 구성한다.

In [3]:
import pandas as pd
import numpy as np

from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error

import warnings

warnings.filterwarnings('ignore')

data = pd.read_csv('전처리후데이터.csv', encoding='utf-8', index_col=0)
data.head()

Unnamed: 0,ID,맥주이름,날짜,평점,Aroma,Appearance,Flavor,Mouthfeel,Overall
0,"italianjohn(4,613)",Kloud Original Gravity,"September 6, 2021",2.8,5,3,5,3,12
1,orda(233),Kloud Original Gravity,"August 29, 2021",3.9,10,5,10,5,9
2,"Radek Kliber(7,985)",Kloud Original Gravity,"June 5, 2021",2.4,6,3,4,2,9
3,"geologyguy(2,380)",Kloud Original Gravity,"May 26, 2021",2.3,4,3,4,3,9
4,Wolff90(167),Kloud Original Gravity,"February 16, 2021",1.0,1,1,1,1,6


In [4]:
# n개 이상의 데이터 리뷰를 남긴 유저, 맥주를 걸러내는 함수
def filtering(data, n):
    select_id = data['ID'].value_counts() >= n
    select_id = select_id[select_id].index.to_list()
    data = data[data['ID'].isin(select_id)]
    
    select_beer = data['맥주이름'].value_counts() >= n
    select_beer = select_beer[select_beer].index.to_list()
    data = data[data['맥주이름'].isin(select_beer)]
    
    return data

data_4 = data.copy()

data_5 = filtering(data_4, 10)

data_5.to_csv('정제된데이터.csv', encoding='utf-8')

In [5]:
data_5.shape

(42920, 9)

# 맥주간 유사도 기반 추천

In [6]:
#코사인 유사도를 사용
data_6 = pd.read_csv('정제된데이터.csv', encoding='utf-8')

In [7]:
data_6

Unnamed: 0.1,Unnamed: 0,ID,맥주이름,날짜,평점,Aroma,Appearance,Flavor,Mouthfeel,Overall
0,0,"italianjohn(4,613)",Kloud Original Gravity,"September 6, 2021",2.8,5,3,5,3,12
1,2,"Radek Kliber(7,985)",Kloud Original Gravity,"June 5, 2021",2.4,6,3,4,2,9
2,3,"geologyguy(2,380)",Kloud Original Gravity,"May 26, 2021",2.3,4,3,4,3,9
3,7,"snoworsummer(9,000)",Kloud Original Gravity,"March 7, 2020",3.0,6,3,6,3,12
4,9,"GuideDogSaint(2,665)",Kloud Original Gravity,"February 8, 2020",2.2,4,2,5,2,9
...,...,...,...,...,...,...,...,...,...,...
42915,4592,DJH-Bird99(207),Hoegaarden,"November 4, 2000",2.9,7,2,6,2,12
42916,4595,shorlin(721),Hoegaarden,"September 25, 2000",4.2,8,5,8,4,17
42917,4597,Sammys(427),Hoegaarden,"April 5, 2004",3.9,8,4,7,4,16
42918,4598,beerman991979(136),Hoegaarden,"August 21, 2000",4.5,9,5,8,5,18


In [8]:
data_6.columns

Index(['Unnamed: 0', 'ID', '맥주이름', '날짜', '평점', 'Aroma', 'Appearance', 'Flavor',
       'Mouthfeel', 'Overall'],
      dtype='object')

In [9]:
data_6 = data_6[['ID', '맥주이름', '날짜', '평점', 'Aroma', 'Appearance', 'Flavor', 'Mouthfeel', 'Overall']]
# data_6.drop(['Unnamed: 0'], axis=1, inplace=True)

In [10]:
data_6

Unnamed: 0,ID,맥주이름,날짜,평점,Aroma,Appearance,Flavor,Mouthfeel,Overall
0,"italianjohn(4,613)",Kloud Original Gravity,"September 6, 2021",2.8,5,3,5,3,12
1,"Radek Kliber(7,985)",Kloud Original Gravity,"June 5, 2021",2.4,6,3,4,2,9
2,"geologyguy(2,380)",Kloud Original Gravity,"May 26, 2021",2.3,4,3,4,3,9
3,"snoworsummer(9,000)",Kloud Original Gravity,"March 7, 2020",3.0,6,3,6,3,12
4,"GuideDogSaint(2,665)",Kloud Original Gravity,"February 8, 2020",2.2,4,2,5,2,9
...,...,...,...,...,...,...,...,...,...
42915,DJH-Bird99(207),Hoegaarden,"November 4, 2000",2.9,7,2,6,2,12
42916,shorlin(721),Hoegaarden,"September 25, 2000",4.2,8,5,8,4,17
42917,Sammys(427),Hoegaarden,"April 5, 2004",3.9,8,4,7,4,16
42918,beerman991979(136),Hoegaarden,"August 21, 2000",4.5,9,5,8,5,18


In [11]:
ratings = data_6.copy()

# 피벗 테이블을 이용해 유저-아이디 매트릭스 구성
user_item_matrix = ratings.pivot_table('평점', index='ID', columns='맥주이름')

#nan -> 0으로 채움
user_item_matrix = user_item_matrix.fillna(0)

#확인
user_item_matrix

맥주이름,Asahi Super Dry,Asahi Super Dry Black,Bali Hai,Bavaria 8.6 (Original),Bavaria Pilsener / Premium Beer,Beck's,Berliner Kindl Jubiläums Pilsener Premium,Bintang Pilsener,Brooklyn Special Effects (Hoppy Amber/Lager),Budweiser,...,Stephans Bräu Premium Lager,Stephans Bräu Premium Pilsener,Suntory The Premium Malt's,Tiger Beer,Tiger Radler Lemon,Tsingtao,Tsingtao Wheat Beer,Victoria Bitter,Volfas Engelman Grünberger Hefeweizen,Warsteiner Premium Verum
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007lund(92),0.0,0.0,0.0,0.0,0.0,2.6,0.0,0.0,0.0,2.0,...,0.0,0.0,0.0,0.0,0.0,2.5,0.0,0.0,0.0,4.1
"00cobraR(1,103)",0.0,0.0,0.0,0.0,0.0,2.3,0.0,0.0,0.0,1.4,...,0.0,0.0,0.0,0.0,0.0,2.1,0.0,0.0,0.0,0.0
12HundredBaud(262),2.3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16erBlech(560),0.0,0.0,0.0,0.0,0.0,0.0,3.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,3.0,0.0,2.0,0.0,2.5
"17Beer(1,533)",3.4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,2.6,0.0,2.8,0.0,0.0,0.0,3.5
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zenkis(979),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
ziobarba2000(101),0.0,0.0,0.0,0.0,1.6,1.6,0.0,0.0,0.0,2.4,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.4
zombywoof(530),0.0,0.0,0.0,0.0,1.5,1.8,0.0,0.0,0.0,1.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5
"zvikar(11,683)",1.2,2.5,0.0,0.0,0.0,2.1,0.0,0.0,0.0,1.4,...,2.2,0.0,0.0,1.7,0.0,1.7,0.0,1.4,0.0,2.3


In [12]:
# user_item_matrix -> Collaborating Filtering(협업필터링) -> item_user_matrix로 변환
user_item_matrix_T = user_item_matrix.T
user_item_matrix_T

ID,007lund(92),"00cobraR(1,103)",12HundredBaud(262),16erBlech(560),"17Beer(1,533)","17thfloor(2,444)","17wojownik(1,415)",184601(206),"1FastSTi(3,361)",20107589(748),...,"zathrus13(1,776)",zborgerd(187),zdet444dd(440),"zdk(1,917)","zebracakes(1,339)",zenkis(979),ziobarba2000(101),zombywoof(530),"zvikar(11,683)","zvsn(6,347)"
맥주이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Asahi Super Dry,0.0,0.0,2.3,0.0,3.4,2.0,0.0,2.4,0.0,0.0,...,0.0,1.8,1.8,1.8,2.7,0.0,0.0,0.0,1.2,0.0
Asahi Super Dry Black,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5,0.0
Bali Hai,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bavaria 8.6 (Original),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bavaria Pilsener / Premium Beer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.7,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,1.6,1.5,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tsingtao,2.5,2.1,0.0,3.0,2.8,2.5,2.1,0.9,0.0,0.0,...,2.2,2.3,2.0,1.8,1.7,0.0,0.0,0.0,1.7,0.0
Tsingtao Wheat Beer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Victoria Bitter,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.4,0.0
Volfas Engelman Grünberger Hefeweizen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [13]:
# # nan -> 0
# user_item_matrix_T = user_item_matrix_T.fillna(0)

In [14]:
user_item_matrix_T

ID,007lund(92),"00cobraR(1,103)",12HundredBaud(262),16erBlech(560),"17Beer(1,533)","17thfloor(2,444)","17wojownik(1,415)",184601(206),"1FastSTi(3,361)",20107589(748),...,"zathrus13(1,776)",zborgerd(187),zdet444dd(440),"zdk(1,917)","zebracakes(1,339)",zenkis(979),ziobarba2000(101),zombywoof(530),"zvikar(11,683)","zvsn(6,347)"
맥주이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Asahi Super Dry,0.0,0.0,2.3,0.0,3.4,2.0,0.0,2.4,0.0,0.0,...,0.0,1.8,1.8,1.8,2.7,0.0,0.0,0.0,1.2,0.0
Asahi Super Dry Black,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,2.5,0.0
Bali Hai,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bavaria 8.6 (Original),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Bavaria Pilsener / Premium Beer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.7,0.0,0.0,...,0.0,0.0,0.0,3.0,0.0,0.0,1.6,1.5,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tsingtao,2.5,2.1,0.0,3.0,2.8,2.5,2.1,0.9,0.0,0.0,...,2.2,2.3,2.0,1.8,1.7,0.0,0.0,0.0,1.7,0.0
Tsingtao Wheat Beer,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Victoria Bitter,0.0,0.0,0.0,2.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,1.4,0.0
Volfas Engelman Grünberger Hefeweizen,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [15]:
# 아이템 - 유저 매트릭스로부터 코사인 유사도 구하기
item_user_cos_sim = cosine_similarity(user_item_matrix_T, user_item_matrix_T)

# cosine_similarity()로 반환된 넘파이 행렬에 맥주이름을 넣어서 (인덱스와 컬럼에) DataFrame로 변환
item_user_cos_sim_df = pd.DataFrame(data=item_user_cos_sim, index=user_item_matrix.columns, columns=user_item_matrix.columns)

# item_user_cos_sim_df 데이터프레임 확임
print(item_user_cos_sim_df.shape)

item_user_cos_sim_df

(67, 67)


맥주이름,Asahi Super Dry,Asahi Super Dry Black,Bali Hai,Bavaria 8.6 (Original),Bavaria Pilsener / Premium Beer,Beck's,Berliner Kindl Jubiläums Pilsener Premium,Bintang Pilsener,Brooklyn Special Effects (Hoppy Amber/Lager),Budweiser,...,Stephans Bräu Premium Lager,Stephans Bräu Premium Pilsener,Suntory The Premium Malt's,Tiger Beer,Tiger Radler Lemon,Tsingtao,Tsingtao Wheat Beer,Victoria Bitter,Volfas Engelman Grünberger Hefeweizen,Warsteiner Premium Verum
맥주이름,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
Asahi Super Dry,1.000000,0.218846,0.233519,0.335265,0.497907,0.626020,0.292564,0.408678,0.200650,0.620447,...,0.122652,0.143115,0.265855,0.622602,0.096980,0.658328,0.111257,0.424949,0.039951,0.609282
Asahi Super Dry Black,0.218846,1.000000,0.106511,0.111241,0.152231,0.168149,0.115646,0.190950,0.085238,0.158158,...,0.078768,0.055430,0.370286,0.185934,0.141308,0.152284,0.074714,0.207105,0.022800,0.161525
Bali Hai,0.233519,0.106511,1.000000,0.174078,0.227267,0.228924,0.160029,0.372243,0.144327,0.193452,...,0.000000,0.034829,0.128659,0.256048,0.074622,0.239795,0.023316,0.276724,0.018493,0.218760
Bavaria 8.6 (Original),0.335265,0.111241,0.174078,1.000000,0.389177,0.320576,0.230765,0.294419,0.168764,0.306410,...,0.145842,0.171276,0.106187,0.323517,0.094424,0.310664,0.094149,0.284148,0.025525,0.327356
Bavaria Pilsener / Premium Beer,0.497907,0.152231,0.227267,0.389177,1.000000,0.513727,0.284525,0.371094,0.219304,0.480503,...,0.131480,0.160548,0.200748,0.510182,0.062264,0.503850,0.101068,0.390849,0.084919,0.518771
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
Tsingtao,0.658328,0.152284,0.239795,0.310664,0.503850,0.648696,0.272876,0.409645,0.187702,0.621338,...,0.125905,0.116819,0.228275,0.630349,0.081342,1.000000,0.078186,0.407711,0.071415,0.653041
Tsingtao Wheat Beer,0.111257,0.074714,0.023316,0.094149,0.101068,0.093481,0.149641,0.077703,0.118586,0.093060,...,0.079757,0.000000,0.078341,0.088557,0.045298,0.078186,1.000000,0.067296,0.147298,0.066346
Victoria Bitter,0.424949,0.207105,0.276724,0.284148,0.390849,0.403287,0.307391,0.457416,0.214448,0.388232,...,0.049600,0.077201,0.268093,0.480005,0.102677,0.407711,0.067296,1.000000,0.036946,0.379788
Volfas Engelman Grünberger Hefeweizen,0.039951,0.022800,0.018493,0.025525,0.084919,0.081880,0.133575,0.019161,0.043325,0.055770,...,0.184770,0.127356,0.017579,0.057227,0.000000,0.071415,0.147298,0.036946,1.000000,0.062467


In [16]:
# 맥주 : 66개
user_item_matrix.columns

Index(['Asahi Super Dry', 'Asahi Super Dry Black', 'Bali Hai',
       'Bavaria 8.6 (Original)', 'Bavaria Pilsener / Premium Beer', 'Beck's',
       'Berliner Kindl Jubiläums Pilsener Premium', 'Bintang Pilsener',
       'Brooklyn Special Effects (Hoppy Amber/Lager)', 'Budweiser',
       'Carlsberg Pilsner', 'Cass Fresh', 'Castlemaine XXXX Gold Lager',
       'Desperados', 'Edelweiss Weissbier Snowfresh',
       'Egger Grapefruit Radler', 'Egger Märzenbier', 'Egger Zwickl',
       'Eichbaum Apostel Bräu', 'Erdinger Weissbier',
       'Erdinger Weissbier Dunkel',
       'Franziskaner Hefe-Weissbier / Weissbier Naturtrub',
       'Gambrinus Originál 10° (Desítka)', 'Grolsch Premium Lager',
       'Guinness Draught', 'Guinness Original 4.2% (Ireland/UK)',
       'Harbin Beer', 'Heineken', 'Heineken Dark Lager', 'Hite D (Dry Finish)',
       'Hite Extra Cold', 'Hite Prime Max', 'Hoegaarden', 'Hoegaarden Rosée',
       'Holsten Premium Bier', 'Jeju Wit Ale', 'Kirin Ichiban',
       'Kloud Fi

In [17]:
# 코젤 맥주와 유사도가 높은 맥주 5개만 추출하기
item_user_cos_sim_df['Kozel Černý (Dark) 10°'].sort_values(ascending=False)[:5]

맥주이름
Kozel Černý (Dark) 10°         1.000000
Leffe Brune / Bruin / Brown    0.539396
Pilsner Urquell                0.529626
Erdinger Weissbier             0.512157
Paulaner Hefe-Weissbier        0.504402
Name: Kozel Černý (Dark) 10°, dtype: float64

In [18]:
# 호가든 맥주와 유사도가 높은 맥주 5개만 추출하기
item_user_cos_sim_df['Hoegaarden Rosée'].sort_values(ascending=False)[:5]

맥주이름
Hoegaarden Rosée               1.000000
Leffe Brune / Bruin / Brown    0.462570
Kronenbourg 1664 Blanc         0.437804
Krombacher Weizen              0.434893
Erdinger Weissbier Dunkel      0.428767
Name: Hoegaarden Rosée, dtype: float64

# 개인화된 맥주 추천

In [19]:
user_item_matrix = user_item_matrix.fillna(0)

In [20]:
# 개인의 평점이 반영된 추천시스템
# ratings_arr.dot(item_sim_arr)는 평점 * 맥주 유사도
# ratings_arr는 사용자 u의 아이템 i와 가장 유사도가 높은 Top_N개 아이템에 대한 실제 평점 벡터
# item_sim_arr는 아이템 i와 가장 유사도가 높은 Top_N개 아이템의 유사도 벡터
def predict_rating(ratings_arr, item_sim_arr):
    ratings_pred = ratings_arr.dot(item_sim_arr) / np.array([np.abs(item_sim_arr).sum(axis=1)])
    return ratings_pred

# 개인화된 예측 평점 구하기
# 평점 value와 유사도 value만 뽑아서 대입
ratings_pred = predict_rating(user_item_matrix.values, item_user_cos_sim_df.values)
ratings_pred_matrix = pd.DataFrame(data=ratings_pred, 
                                   index=user_item_matrix.index,
                                   columns = user_item_matrix.columns)

# 개인별로 계산된 예측 평점
ratings_pred_matrix

맥주이름,Asahi Super Dry,Asahi Super Dry Black,Bali Hai,Bavaria 8.6 (Original),Bavaria Pilsener / Premium Beer,Beck's,Berliner Kindl Jubiläums Pilsener Premium,Bintang Pilsener,Brooklyn Special Effects (Hoppy Amber/Lager),Budweiser,...,Stephans Bräu Premium Lager,Stephans Bräu Premium Pilsener,Suntory The Premium Malt's,Tiger Beer,Tiger Radler Lemon,Tsingtao,Tsingtao Wheat Beer,Victoria Bitter,Volfas Engelman Grünberger Hefeweizen,Warsteiner Premium Verum
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007lund(92),0.825933,0.472452,0.609268,0.644847,0.754780,0.924584,0.581209,0.645169,0.582335,0.932205,...,0.435510,0.449718,0.548386,0.795512,0.364189,0.895747,0.409711,0.666318,0.374063,0.940003
"00cobraR(1,103)",0.729282,0.407605,0.532590,0.558526,0.666277,0.819192,0.505894,0.559851,0.486007,0.821567,...,0.372680,0.390929,0.472217,0.703144,0.312128,0.795377,0.347403,0.589223,0.327503,0.792723
12HundredBaud(262),0.945683,0.613168,0.758908,0.818943,0.889731,0.962473,0.763743,0.797137,0.768634,0.961761,...,0.565352,0.615284,0.669902,0.912675,0.504523,0.949188,0.534454,0.820498,0.483214,0.967179
16erBlech(560),1.017001,0.718744,0.839407,0.913689,0.968334,1.037456,1.063381,0.914348,0.907533,1.040690,...,0.733613,0.786139,0.800565,1.018247,0.547395,1.074721,0.643972,0.979471,0.716241,1.082232
"17Beer(1,533)",1.473218,0.931280,1.101567,1.169928,1.313834,1.475962,1.082442,1.171361,1.130682,1.493568,...,0.808604,0.826135,1.018632,1.429485,0.766526,1.502763,0.759995,1.197748,0.724507,1.514579
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zenkis(979),0.558182,0.364816,0.441994,0.475964,0.532667,0.591611,0.476272,0.474385,0.474086,0.590714,...,0.351777,0.381079,0.388387,0.554118,0.245309,0.578353,0.345872,0.486712,0.330146,0.597815
ziobarba2000(101),0.572555,0.358351,0.454847,0.495884,0.583538,0.627107,0.455155,0.483032,0.436248,0.652585,...,0.321903,0.371301,0.419150,0.565989,0.264107,0.597424,0.304740,0.501369,0.276246,0.643060
zombywoof(530),0.923079,0.545350,0.687948,0.742522,0.897887,1.029394,0.686981,0.725868,0.669088,1.026736,...,0.503504,0.539578,0.614468,0.904222,0.430271,0.966804,0.468250,0.767288,0.457287,1.037080
"zvikar(11,683)",1.681006,1.515327,1.439532,1.536480,1.629543,1.730664,1.490652,1.502843,1.424180,1.724983,...,1.566965,1.475227,1.431082,1.676148,1.086630,1.713104,1.158953,1.581429,1.208413,1.742197


In [21]:
user_item_matrix.values

array([[0. , 0. , 0. , ..., 0. , 0. , 4.1],
       [0. , 0. , 0. , ..., 0. , 0. , 0. ],
       [2.3, 0. , 0. , ..., 0. , 0. , 0. ],
       ...,
       [0. , 0. , 0. , ..., 0. , 0. , 2.5],
       [1.2, 2.5, 0. , ..., 1.4, 0. , 2.3],
       [0. , 0. , 0. , ..., 0. , 0. , 2.5]])

In [22]:
item_user_cos_sim_df.values

array([[1.        , 0.21884595, 0.23351935, ..., 0.42494937, 0.03995104,
        0.60928179],
       [0.21884595, 1.        , 0.10651104, ..., 0.20710499, 0.02280036,
        0.16152485],
       [0.23351935, 0.10651104, 1.        , ..., 0.27672392, 0.01849319,
        0.21875995],
       ...,
       [0.42494937, 0.20710499, 0.27672392, ..., 1.        , 0.03694598,
        0.37978787],
       [0.03995104, 0.02280036, 0.01849319, ..., 0.03694598, 1.        ,
        0.06246695],
       [0.60928179, 0.16152485, 0.21875995, ..., 0.37978787, 0.06246695,
        1.        ]])

In [23]:
# views.py - 01
# 우리가 예측한 평점과 실제 평점간의 차이를 MSE로 계산
def get_mse(pred, actual):
    # 평점이 있는 실제 맥주만 추출
    pred = pred[actual.nonzero()].flatten()
    actual = actual[actual.nonzero()].flatten()
    return mean_squared_error(pred, actual)

print('아이템 기반 모든 최근접 이웃 MSE: ', 
      get_mse(ratings_pred, user_item_matrix.values))

아이템 기반 모든 최근접 이웃 MSE:  2.5627046332055294


# Top-N 기반한 추천

In [24]:
# 3개의 col까지만. 3개의 맥주에 대해서 유사도가 큰 5개 선택
top_n_items = [np.argsort(item_user_cos_sim_df.values[:,3])[:-5:-1]]
top_n_items

[array([ 3, 13,  4, 48], dtype=int64)]

In [25]:
#viess.py - 02
# 따라서 가장 비슷한 유사도를 가지는 맥주만 유사도 벡터로 사용
# 특정 맥주와 비슷한 유사도를 가지는 맥주 Top_N에 대해서만 적용 -> 시간오래걸림

def predict_rating_topsim(ratings_arr, item_sim_arr, n=20):
    # 사용자-아이템 평점 행렬 크기만큼 0으로 채운 예측 행렬 초기화
    pred = np.zeros(ratings_arr.shape)

    # 사용자-아이템 평점 행렬의 맥주 개수만큼 루프
    for col in range(ratings_arr.shape[1]):
        # 유사도 행렬에서 유사도가 큰 순으로 n개의 데이터 행렬의 인덱스 반환
        top_n_items = [np.argsort(item_sim_arr[:, col])[:-n-1:-1]]
        # 개인화된 예측 평점 계산 : 각 col 맥주별(1개), 2238 사용자들의 예측평점
        for row in range(ratings_arr.shape[0]):
            pred[row, col] = item_sim_arr[col,:][top_n_items].dot(
            ratings_arr[row, :][top_n_items].T)
            pred[row, col] /= np.sum(item_sim_arr[col,:][top_n_items])

    return pred

ratings_pred = predict_rating_topsim(user_item_matrix.values, item_user_cos_sim_df.values, n=10)
print('아이템 기반 최근접 TOP-N 이웃 MSE: ', 
      get_mse(ratings_pred, user_item_matrix.values))

# 계산된 예측 평점 데이터는 DataFrame으로 재생성
ratings_pred_matrix = pd.DataFrame(data=ratings_pred, index=user_item_matrix.index,
                                  columns=user_item_matrix.columns)

ratings_pred_matrix

아이템 기반 최근접 TOP-N 이웃 MSE:  0.8166139274803721


맥주이름,Asahi Super Dry,Asahi Super Dry Black,Bali Hai,Bavaria 8.6 (Original),Bavaria Pilsener / Premium Beer,Beck's,Berliner Kindl Jubiläums Pilsener Premium,Bintang Pilsener,Brooklyn Special Effects (Hoppy Amber/Lager),Budweiser,...,Stephans Bräu Premium Lager,Stephans Bräu Premium Pilsener,Suntory The Premium Malt's,Tiger Beer,Tiger Radler Lemon,Tsingtao,Tsingtao Wheat Beer,Victoria Bitter,Volfas Engelman Grünberger Hefeweizen,Warsteiner Premium Verum
ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007lund(92),1.853715,0.000000,0.180336,0.322543,1.684949,2.395977,0.000000,0.476299,0.000000,2.548932,...,0.000000,0.000000,0.000000,1.661365,0.000000,2.435651,0.000000,0.475159,0.000000,2.292857
"00cobraR(1,103)",1.559594,0.000000,0.363776,0.000000,1.438090,2.089539,0.000000,0.399756,0.000000,2.133369,...,0.000000,0.000000,0.000000,1.525446,0.000000,2.144488,0.000000,0.398790,0.000000,2.201561
12HundredBaud(262),1.238114,0.147196,0.826135,1.058564,1.166818,1.595804,0.865955,0.812711,0.861995,1.289343,...,0.000000,0.153457,0.169938,1.863955,0.000000,1.502125,0.178425,1.116189,0.000000,1.955943
16erBlech(560),1.088234,0.000000,0.382890,1.136457,0.727064,1.126340,1.710858,1.333559,0.860100,0.933172,...,0.109576,0.416345,0.149016,1.584981,0.000000,1.273954,0.182811,1.315989,0.410819,1.639313
"17Beer(1,533)",2.571728,0.426158,1.363464,1.079340,2.022468,2.196193,0.706488,1.797065,0.927609,2.165130,...,0.000000,0.000000,0.431484,2.729379,0.160392,2.883350,0.000000,1.820384,0.000000,2.339801
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
zenkis(979),0.740374,0.000000,0.154182,0.535107,0.699945,1.028638,0.867704,0.566614,0.808544,1.103688,...,0.137034,0.148854,0.000000,1.351141,0.000000,1.017072,0.289583,0.766529,0.000000,1.237706
ziobarba2000(101),0.488947,0.000000,0.000000,0.523652,0.828055,1.166178,0.221510,0.133099,0.162958,1.419537,...,0.000000,0.149109,0.000000,0.496051,0.000000,0.874443,0.000000,0.000000,0.000000,1.264903
zombywoof(530),1.627478,0.125138,0.183987,0.586143,2.291948,2.694942,0.000000,0.576705,0.152619,2.451177,...,0.000000,0.000000,0.000000,1.842278,0.096235,2.217978,0.000000,0.587550,0.000000,3.008202
"zvikar(11,683)",1.951100,1.577630,1.104775,1.679093,1.978483,2.484421,1.552183,1.674074,1.679866,2.344590,...,1.264707,1.341808,1.194555,2.460580,0.629390,2.359908,0.395724,1.981119,0.462380,2.699022


# 실제 사용자에게 추천하기

In [26]:
# 사용자 아이디와  평점이 주어진 상황에서 맥주 3개 추천
# 예시로는 행렬에 존재하는 유저 사용

In [27]:
# username='snoworsummer(8,581)'
username='007lund(92)'
top_n = 10

# 특정 유저 대상으로 맥주추천
user_rating_id = user_item_matrix.loc[username, :]
# 유저가 먹었던 맥주들 출력
user_rating_id[user_rating_id > 0].sort_values(ascending=False)[:10]

맥주이름
Warsteiner Premium Verum             4.1
Guinness Draught                     3.7
Paulaner Hefe-Weissbier              3.3
Pilsner Urquell                      3.1
Stella Artois                        2.9
Beck's                               2.6
Sapporo Premium Beer / Draft Beer    2.6
Tsingtao                             2.5
Budweiser                            2.0
Heineken                             2.0
Name: 007lund(92), dtype: float64

In [28]:
# views.py - 03
# 사용자가 먹어본 맥주 제외
def user_not_tried_beer(user_item_matrix, userid):
    #ID로 입력 받은 사용자의 맥주 정보를 추출해 Series로 변환
    #반환된 user_rating는 beer('맥주이름')을 인덱스로 가지는 Series객체?
    user_rating=user_item_matrix.loc[userid, :]
    
    #user_rating이 0보다 크면 기존에 마신 맥주
    #0보다큰 인덱스를 추출해 list객체로 만듦
    tried = user_rating[user_rating>0].index.tolist()
    
    #모든 맥주명을 list객체로
    beer_list = user_item_matrix.columns.tolist()
    
    #tried에 해당하는 맥주는 beer_list에서 제외
    not_tried = [beer for beer in beer_list if beer not in tried]
    
    return not_tried

# views.py - 04
#예측 평점 DataFrame에서 ID인덱스와 not_tried로 들어온 맥주명 추출 후
#가장 예측 평점이 높은 순으로 정렬

def recommand_beer_by_id(pred_df, userid, not_tried, top_n):
    recommand_beer = pred_df.loc[userid, not_tried].sort_values(ascending=False)[:top_n]
    return recommand_beer

#먹지 않은 맥주
not_tried = user_not_tried_beer(user_item_matrix, username)
not_tried

['Asahi Super Dry',
 'Asahi Super Dry Black',
 'Bali Hai',
 'Bavaria 8.6 (Original)',
 'Bavaria Pilsener / Premium Beer',
 'Berliner Kindl Jubiläums Pilsener Premium',
 'Bintang Pilsener',
 'Brooklyn Special Effects (Hoppy Amber/Lager)',
 'Cass Fresh',
 'Castlemaine XXXX Gold Lager',
 'Desperados',
 'Edelweiss Weissbier Snowfresh',
 'Egger Grapefruit Radler',
 'Egger Märzenbier',
 'Egger Zwickl',
 'Eichbaum Apostel Bräu',
 'Erdinger Weissbier',
 'Erdinger Weissbier Dunkel',
 'Franziskaner Hefe-Weissbier / Weissbier Naturtrub',
 'Gambrinus Originál 10° (Desítka)',
 'Grolsch Premium Lager',
 'Guinness Original 4.2% (Ireland/UK)',
 'Harbin Beer',
 'Heineken Dark Lager',
 'Hite D (Dry Finish)',
 'Hite Extra Cold',
 'Hite Prime Max',
 'Hoegaarden',
 'Hoegaarden Rosée',
 'Holsten Premium Bier',
 'Jeju Wit Ale',
 'Kirin Ichiban',
 'Kloud Fitz Super Clear',
 'Kloud Original Gravity',
 'Kozel Černý (Dark) 10°',
 'Krombacher Weizen',
 'Kronenbourg 1664 Blanc',
 'Lapin Kulta IV A',
 'Leffe Brune 

# 아이템 기반의 최근접 이웃 CF로 맥주 추천

In [29]:
# top_n과 비슷한 맥주만 추천에 사용
ratings_pred = predict_rating_topsim(user_item_matrix.values, item_user_cos_sim_df.values, n=5)

# 계산된 예측 평점 데이터는 DataFrame으로 재생성
ratings_pred_matrix = pd.DataFrame(data=ratings_pred, index=user_item_matrix.index,
                                   columns=user_item_matrix.columns)

# 유저가 먹지 않은 맥주이름 추출
not_tried = user_not_tried_beer(user_item_matrix, username)

# 아이템 기반의 최근접 이웃 CF로 맥주 추천
recommand_beer = recommand_beer_by_id(ratings_pred_matrix, username, not_tried, top_n=3)
recommand_beer = pd.DataFrame(data=recommand_beer.values, index=recommand_beer.index,
                           columns=['예측평점'])
recommand_beer

Unnamed: 0_level_0,예측평점
맥주이름,Unnamed: 1_level_1
Hoegaarden,2.43233
Grolsch Premium Lager,2.16354
Asahi Super Dry,2.12665
