In [1]:
import pandas as pd
import numpy as np
import os
import glob
import gc
from itertools import islice
from scipy.sparse import csr_matrix
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.decomposition import TruncatedSVD
from surprise import SVD, accuracy
from surprise import Dataset, Reader
from surprise.model_selection import cross_validate
from surprise.model_selection.split import train_test_split

# 데이터 준비
## 제품 데이터 준비

In [87]:
category_list = ['bath', 'bedding', 'cleaning_supplies', 
'furniture', 'heating_cooling_airquality', 'home_storage_organization', 
'irons_steamers', 'kids_home_store', 'kitchen_dining', 
'lighting_ceiling_fans', 'party_supplies', 'seasonal_decor', 
'vacuums_floor_care', 'wall_art']

In [93]:
# for category in category_list:
#     tmp = pd.read_csv(f'./amazon_meta/{category}.csv')
#     try:
#         tmp.drop('Unnamed: 0', axis=1, inplace=True)
#         tmp.to_csv(f'./amazon_meta/{category}.csv', index=False)
#     except:
#         pass

In [82]:
# def re_asin(str):
#     if str.startswith('B') and len(str) == 10:
#         return True
#     else:
#         return False

In [86]:
# for category in category_list:
#     mask = []
#     tmp = pd.read_csv(f'./amazon_meta/{category}.csv')
#     tmp['Asin'].apply(lambda x: mask.append(re_asin(x)))
#     tmp = tmp[mask]
#     tmp.to_csv(f'./amazon_meta/{category}.csv', index=False)

In [2]:
bedding_df = pd.read_csv('./amazon_meta/bedding.csv')
bedding_df['Category'] = 'bedding'

kids_df = pd.read_csv('./amazon_meta/kids_home_store.csv')
kids_df['Category'] = 'Kids_Home_Store'

kitchen_df = pd.read_csv('./amazon_meta/kitchen_dining.csv')
kitchen_df['Category'] = 'Kitchen_Dining'

seasonal_df = pd.read_csv('./amazon_meta/seasonal_decor.csv')
seasonal_df['Category'] = 'Seasonal_Decor'

item_df = pd.concat([bedding_df, kids_df, kitchen_df, seasonal_df])

# print('Item Dataframe')
display(item_df)

Unnamed: 0,Name,Asin,Price,Link,Img,Category
0,Sealy Baby - Stain Protection Waterproof Fitte...,B001FCK32C,$15.60,https://www.amazon.com/Sealy-Protection-Waterp...,,bedding
1,"SafeRest Mattress Protector – Queen, Premium, ...",B003PWNH4Q,$37.99,https://www.amazon.com/SafeRest-Hypoallergenic...,,bedding
2,SafeRest Premium Hypoallergenic Bed Bug Proof ...,B003Q0U740,$11.11,https://www.amazon.com/SafeRest-Hypoallergenic...,,bedding
3,SafeRest Zippered Mattress Protector - Premium...,B004BAEF7E,$37.71,https://www.amazon.com/SafeRest-Premium-Zipper...,,bedding
4,Bed Band Not Made in China. 100% USA Worker As...,B004I3VDWY,$12.99,https://www.amazon.com/Bed-Band-Assembled-Susp...,,bedding
...,...,...,...,...,...,...
595,"Neon Light Strip, Flexible Neon Rope Light 12v...",B09YYD728H,$29.99,https://www.amazon.com/Flexible-Freedom-Contro...,,Seasonal_Decor
596,"Ostritec Solar Outdoor Watering Can Lights, Wa...",B09ZHLMTXH,$29.99,https://www.amazon.com/Ostritec-Watering-Water...,,Seasonal_Decor
597,"ZUUKOO LIGHT Outdoor String Lights, RGB 48ft P...",B0B17FTDVV,$44.98,https://www.amazon.com/ZUUKOO-LIGHT-Waterproof...,,Seasonal_Decor
598,Happy Juneteenth Banner，Africa American Indepe...,B0B1PM4WXP,$11.98,https://www.amazon.com/Juneteenth-Banner%EF%BC...,,Seasonal_Decor


In [3]:
item_df.isnull().sum()

Name          0
Asin          0
Price         0
Link          0
Img         905
Category      0
dtype: int64

In [4]:
item_df = item_df.drop_duplicates(subset='Asin')

### Asin에 고유한 number 부여

In [5]:
asin_list = list(item_df['Asin'])

In [6]:
asin_number_dict = {}

for idx, asin in enumerate(asin_list):
    asin_number_dict[asin] = idx

In [7]:
asin_number_list = []

for asin in item_df['Asin']:
    asin_number_list.append(asin_number_dict.get(asin))

In [8]:
item_df['Asin_Number'] = asin_number_list
display(item_df)

print('** rating_df info **')
print(item_df.info())

Unnamed: 0,Name,Asin,Price,Link,Img,Category,Asin_Number
0,Sealy Baby - Stain Protection Waterproof Fitte...,B001FCK32C,$15.60,https://www.amazon.com/Sealy-Protection-Waterp...,,bedding,0
1,"SafeRest Mattress Protector – Queen, Premium, ...",B003PWNH4Q,$37.99,https://www.amazon.com/SafeRest-Hypoallergenic...,,bedding,1
2,SafeRest Premium Hypoallergenic Bed Bug Proof ...,B003Q0U740,$11.11,https://www.amazon.com/SafeRest-Hypoallergenic...,,bedding,2
3,SafeRest Zippered Mattress Protector - Premium...,B004BAEF7E,$37.71,https://www.amazon.com/SafeRest-Premium-Zipper...,,bedding,3
4,Bed Band Not Made in China. 100% USA Worker As...,B004I3VDWY,$12.99,https://www.amazon.com/Bed-Band-Assembled-Susp...,,bedding,4
...,...,...,...,...,...,...,...
595,"Neon Light Strip, Flexible Neon Rope Light 12v...",B09YYD728H,$29.99,https://www.amazon.com/Flexible-Freedom-Contro...,,Seasonal_Decor,1600
596,"Ostritec Solar Outdoor Watering Can Lights, Wa...",B09ZHLMTXH,$29.99,https://www.amazon.com/Ostritec-Watering-Water...,,Seasonal_Decor,1601
597,"ZUUKOO LIGHT Outdoor String Lights, RGB 48ft P...",B0B17FTDVV,$44.98,https://www.amazon.com/ZUUKOO-LIGHT-Waterproof...,,Seasonal_Decor,1602
598,Happy Juneteenth Banner，Africa American Indepe...,B0B1PM4WXP,$11.98,https://www.amazon.com/Juneteenth-Banner%EF%BC...,,Seasonal_Decor,1603


** rating_df info **
<class 'pandas.core.frame.DataFrame'>
Int64Index: 1605 entries, 0 to 599
Data columns (total 7 columns):
 #   Column       Non-Null Count  Dtype 
---  ------       --------------  ----- 
 0   Name         1605 non-null   object
 1   Asin         1605 non-null   object
 2   Price        1605 non-null   object
 3   Link         1605 non-null   object
 4   Img          700 non-null    object
 5   Category     1605 non-null   object
 6   Asin_Number  1605 non-null   int64 
dtypes: int64(1), object(6)
memory usage: 100.3+ KB
None


## 리뷰 데이터 준비
### 리뷰 데이터 합치기

In [114]:
review_category_list = ['bedding', 'kids_home_store', 'kitchen_dining', 'seasonal_decor']

for category in review_category_list:
    files_joined = os.path.join('./amazon/review/{}'.format(category), '*.csv')
    list_files = glob.glob(files_joined)

'./amazon/review/seasonal_decor\\*.csv'

In [9]:
files_joined = os.path.join('./amazon_review/bedding', '*.csv')
list_files = glob.glob(files_joined)
a = pd.concat(map(pd.read_csv, list_files), ignore_index=True).drop('Unnamed: 0', axis=1)

In [10]:
files_joined = os.path.join('./amazon_review/kids_home_store/', '*.csv')
list_files = glob.glob(files_joined)
b = pd.concat(map(pd.read_csv, list_files), ignore_index=True).drop('Unnamed: 0', axis=1)

In [11]:
files_joined = os.path.join('./amazon_review/kitchen_dining/', '*.csv')
list_files = glob.glob(files_joined)
c = pd.concat(map(pd.read_csv, list_files), ignore_index=True).drop('Unnamed: 0', axis=1)

In [12]:
files_joined = os.path.join('./amazon_review/seasonal_decor/', '*.csv')
list_files = glob.glob(files_joined)
d = pd.concat(map(pd.read_csv, list_files), ignore_index=True).drop('Unnamed: 0', axis=1)

In [13]:
rating_df = pd.concat([a, b, c, d])

In [14]:
rating_df

Unnamed: 0,Asin,ID,Date,Rating,Title,Body
0,B001FCK32C,Chelsea Edwards,5162022,5,Works perfect!,Exactly what I needed! Works perfect! Mattress...
1,B001FCK32C,Atalanta,5102022,3,Fits well but not ideal,We bought an Ikea crib and Ikea has been out o...
2,B001FCK32C,Marcos Chavez,482022,5,Nice & Comfy Protection,Fits perfect for toddler mattress to add comfo...
3,B001FCK32C,Bonnie Barnette,3132022,5,Good protection for crib matress,Great protection for crib matress
4,B001FCK32C,Jess Ramos,3102022,5,Must Have,My son spits up quiet often and this helps to ...
...,...,...,...,...,...,...
22372,B09WQZGGWV,Carolyn Estes,1142022,4,The sun,They are very good when there is sun
22373,B09WQZGGWV,j. hodge,1142022,3,Poor,"not bright, haven’t had long enough to rate lo..."
22374,B09WQZGGWV,BKitimat,1142022,5,Lights are Awesome!,I was a bit sceptical of how good the lights w...
22375,B09WQZGGWV,Lisa,1132022,5,Going strong for two months,Perfect addition to my backyard.


In [15]:
rating_df.shape

(116399, 6)

In [16]:
rating_df.isnull().sum()

Asin        0
ID          7
Date        0
Rating      0
Title      20
Body      616
dtype: int64

In [17]:
rating_df = rating_df.dropna(subset=['ID'])
rating_df.shape

(116392, 6)

In [18]:
rating_df['ID'].unique().__len__()

78592

### 유저 ID에 고유한 number 부여

In [19]:
id_list = rating_df['ID'].unique()

In [20]:
id_number_dict = {}

for idx, id in enumerate(id_list):
    id_number_dict[id] = idx

In [21]:
id_number_list = []

for id in rating_df['ID']:
    id_number_list.append(id_number_dict.get(id))

In [22]:
rating_df['User_Number'] = id_number_list

print('** rating_df **')
display(rating_df)

print('** rating_df info **')
print(rating_df.info())

** rating_df **


A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_df['User_Number'] = id_number_list


Unnamed: 0,Asin,ID,Date,Rating,Title,Body,User_Number
0,B001FCK32C,Chelsea Edwards,5162022,5,Works perfect!,Exactly what I needed! Works perfect! Mattress...,0
1,B001FCK32C,Atalanta,5102022,3,Fits well but not ideal,We bought an Ikea crib and Ikea has been out o...,1
2,B001FCK32C,Marcos Chavez,482022,5,Nice & Comfy Protection,Fits perfect for toddler mattress to add comfo...,2
3,B001FCK32C,Bonnie Barnette,3132022,5,Good protection for crib matress,Great protection for crib matress,3
4,B001FCK32C,Jess Ramos,3102022,5,Must Have,My son spits up quiet often and this helps to ...,4
...,...,...,...,...,...,...,...
22372,B09WQZGGWV,Carolyn Estes,1142022,4,The sun,They are very good when there is sun,78589
22373,B09WQZGGWV,j. hodge,1142022,3,Poor,"not bright, haven’t had long enough to rate lo...",78590
22374,B09WQZGGWV,BKitimat,1142022,5,Lights are Awesome!,I was a bit sceptical of how good the lights w...,78591
22375,B09WQZGGWV,Lisa,1132022,5,Going strong for two months,Perfect addition to my backyard.,1482


** rating_df info **
<class 'pandas.core.frame.DataFrame'>
Int64Index: 116392 entries, 0 to 22376
Data columns (total 7 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   Asin         116392 non-null  object
 1   ID           116392 non-null  object
 2   Date         116392 non-null  object
 3   Rating       116392 non-null  object
 4   Title        116372 non-null  object
 5   Body         115776 non-null  object
 6   User_Number  116392 non-null  int64 
dtypes: int64(1), object(6)
memory usage: 7.1+ MB
None


### rating_df 에서 Asin에 고유한 number 부여

In [23]:
asin_number_list2 = []

for asin in rating_df['Asin']:
    asin_number_list2.append(asin_number_dict.get(asin))
    
rating_df['Asin_Number'] = asin_number_list2

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  rating_df['Asin_Number'] = asin_number_list2


In [24]:
print('** rating_df Dataframe **')
display(rating_df)

print('\n** rating_df Information **')
display(rating_df.info())

** rating_df Dataframe **


Unnamed: 0,Asin,ID,Date,Rating,Title,Body,User_Number,Asin_Number
0,B001FCK32C,Chelsea Edwards,5162022,5,Works perfect!,Exactly what I needed! Works perfect! Mattress...,0,0
1,B001FCK32C,Atalanta,5102022,3,Fits well but not ideal,We bought an Ikea crib and Ikea has been out o...,1,0
2,B001FCK32C,Marcos Chavez,482022,5,Nice & Comfy Protection,Fits perfect for toddler mattress to add comfo...,2,0
3,B001FCK32C,Bonnie Barnette,3132022,5,Good protection for crib matress,Great protection for crib matress,3,0
4,B001FCK32C,Jess Ramos,3102022,5,Must Have,My son spits up quiet often and this helps to ...,4,0
...,...,...,...,...,...,...,...,...
22372,B09WQZGGWV,Carolyn Estes,1142022,4,The sun,They are very good when there is sun,78589,1592
22373,B09WQZGGWV,j. hodge,1142022,3,Poor,"not bright, haven’t had long enough to rate lo...",78590,1592
22374,B09WQZGGWV,BKitimat,1142022,5,Lights are Awesome!,I was a bit sceptical of how good the lights w...,78591,1592
22375,B09WQZGGWV,Lisa,1132022,5,Going strong for two months,Perfect addition to my backyard.,1482,1592



** rating_df Information **
<class 'pandas.core.frame.DataFrame'>
Int64Index: 116392 entries, 0 to 22376
Data columns (total 8 columns):
 #   Column       Non-Null Count   Dtype 
---  ------       --------------   ----- 
 0   Asin         116392 non-null  object
 1   ID           116392 non-null  object
 2   Date         116392 non-null  object
 3   Rating       116392 non-null  object
 4   Title        116372 non-null  object
 5   Body         115776 non-null  object
 6   User_Number  116392 non-null  int64 
 7   Asin_Number  116392 non-null  int64 
dtypes: int64(2), object(6)
memory usage: 8.0+ MB


None

# 추천 알고리즘

## 추천 제품 개수

In [38]:
topK = 5

### 인기 기반 구현 Popular Based
- 가중 등급 시스템(weighted rating)
- WR = ( (v / (v + m)) * R) + ( (m / (v + m)) * C )

- R: 아이템의 평균 rating

- v: 아이템에 투표한 수

- m: 인기 항목에 나열되는 데 필요한 최소 투표수

- C: 전체 데이터셋을 통해서 얻은 평균 rating

In [39]:
def weightedRating(v,m,R,C):
    return ( (v / (v + m)) * R) + ( (m / (v + m)) * C )

def assignPopularBasedScore(ratingDf, itemDf, userCol, itemCol, ratingCol):
    # pre processing
    voteCount = (
        ratingDf
        .groupby(itemCol,as_index=False)
        .agg( {userCol:'count', ratingCol:'mean'} )
        )
    voteCount.columns = [itemCol, 'voteCount', 'avg_rating']
    
    # calcuate input parameters
    C = np.mean(voteCount['avg_rating'])
    m = np.percentile(voteCount['voteCount'], 70)
    voteCount = voteCount[voteCount['voteCount'] >= m]
    R = voteCount['avg_rating']
    v = voteCount['voteCount']
    voteCount['weightedRating'] = weightedRating(v,m,R,C)
    
    # post processing
    voteCount = voteCount.merge(itemDf, on = [itemCol], how = 'left')
    popular_items = voteCount.loc[:,[itemCol, 'Category', 'voteCount', 'avg_rating', 'weightedRating']]
    
    return popular_items

# calcualte popularity based
pop_items = assignPopularBasedScore(rating_df, item_df, 'ID', 'Asin', 'Rating')
pop_items = pop_items.sort_values('weightedRating', ascending = False)

pop_items.reset_index(drop=True).head(10)

Unnamed: 0,Asin,Category,voteCount,avg_rating,weightedRating
0,B01LWQMM4V,bedding,200,4.92,4.619711
1,B09282QY2X,bedding,198,4.878788,4.590313
2,B0711QYPJD,Kids_Home_Store,200,4.78,4.526378
3,B09LLL8FHP,Kids_Home_Store,100,5.0,4.509566
4,B09Z2DJCML,bedding,200,4.75,4.506378
5,B08DDJ7BKP,bedding,200,4.73,4.493044
6,B003KRHDNC,Kitchen_Dining,100,4.93,4.474566
7,B07DJ53GZQ,Kitchen_Dining,100,4.92,4.469566
8,B07JKN43H6,bedding,200,4.68,4.459711
9,B08F5KQFXJ,Kids_Home_Store,100,4.89,4.454566


# Content-based
- 제품과 카테고리 기반 코사인 유사도를 계산해서 추천
- 사용자가 원하는 content에 대해서 벡터화 -> 벡터화된 데이터로 유사도만 계산해주면 사용 가능

- 근데 이거는 어차페 카테고리가 다 같아서 쓸모 없을 듯

In [40]:
item_df.head()

Unnamed: 0,Name,Asin,Price,Link,Img,Category,Asin_Number
0,Sealy Baby - Stain Protection Waterproof Fitte...,B001FCK32C,$15.60,https://www.amazon.com/Sealy-Protection-Waterp...,,bedding,0
1,"SafeRest Mattress Protector – Queen, Premium, ...",B003PWNH4Q,$37.99,https://www.amazon.com/SafeRest-Hypoallergenic...,,bedding,1
2,SafeRest Premium Hypoallergenic Bed Bug Proof ...,B003Q0U740,$11.11,https://www.amazon.com/SafeRest-Hypoallergenic...,,bedding,2
3,SafeRest Zippered Mattress Protector - Premium...,B004BAEF7E,$37.71,https://www.amazon.com/SafeRest-Premium-Zipper...,,bedding,3
4,Bed Band Not Made in China. 100% USA Worker As...,B004I3VDWY,$12.99,https://www.amazon.com/Bed-Band-Assembled-Susp...,,bedding,4


In [41]:
rating_df.head()

Unnamed: 0,Asin,ID,Date,Rating,Title,Body,User_Number,Asin_Number
0,B001FCK32C,Chelsea Edwards,5162022,5,Works perfect!,Exactly what I needed! Works perfect! Mattress...,0,0
1,B001FCK32C,Atalanta,5102022,3,Fits well but not ideal,We bought an Ikea crib and Ikea has been out o...,1,0
2,B001FCK32C,Marcos Chavez,482022,5,Nice & Comfy Protection,Fits perfect for toddler mattress to add comfo...,2,0
3,B001FCK32C,Bonnie Barnette,3132022,5,Good protection for crib matress,Great protection for crib matress,3,0
4,B001FCK32C,Jess Ramos,3102022,5,Must Have,My son spits up quiet often and this helps to ...,4,0


In [42]:
def topKItems(itemId, topK, corrMat, mapName):
    
    # 상관계수 정렬 &nd topK 제품 선택
    topItems = corrMat[itemId,:].argsort()[-topK:][::-1] 
    topItems = [mapName[e] for e in topItems] 

    return topItems

# preprocessing
ratedItems = item_df.loc[item_df['Asin'].isin(rating_df['Asin'])].copy()

# extract the Category
Category = ratedItems['Category'].str.split(",", expand=True)

# get all possible Category
allCategory = set()
for c in Category.columns:
    distinctCategory = Category[c].str.lower().str.strip().unique()
    allCategory.update(distinctCategory)

# create item-Category matrix
itemCategoryMat = ratedItems[['Asin', 'Category']].copy()
itemCategoryMat['Category'] = itemCategoryMat['Category'].str.lower().str.strip()

# OHE the genres column
for Category in allCategory:
    itemCategoryMat[Category] = np.where(itemCategoryMat['Category'].str.contains(Category), 1, 0)
itemCategoryMat = itemCategoryMat.drop(['Category'], axis=1)
itemCategoryMat = itemCategoryMat.set_index('Asin')

# compute similarity matix
corrMat = cosine_similarity(itemCategoryMat)


# get topK similar items
ind2name = {ind:name for ind,name in enumerate(itemCategoryMat.index)}
name2ind = {v:k for k,v in ind2name.items()}
similarItems = topKItems(name2ind['B001FCK32C'],
                            topK = topK,
                            corrMat = corrMat,
                            mapName = ind2name)

# display result
print(f"The {topK} similar product to Asin B001FCK32C")
display(item_df.loc[item_df['Asin'].isin(similarItems)])

del corrMat
gc.collect()

The 5 similar product to Asin B001FCK32C


Unnamed: 0,Name,Asin,Price,Link,Img,Category,Asin_Number
0,Sealy Baby - Stain Protection Waterproof Fitte...,B001FCK32C,$15.60,https://www.amazon.com/Sealy-Protection-Waterp...,,bedding,0
285,MIULEE Throw Pillow Insert Hypoallergenic Prem...,B083LMRXJC,$11.99,https://www.amazon.com/MIULEE-Hypoallergenic-P...,,bedding,285
294,NTBAY Microfiber Twin Extra-Long Bedding Flat ...,B088BK84JJ,$15.99,https://www.amazon.com/NTBAY-Microfiber-Extra-...,,bedding,294
295,MIULEE Pack of 4 Hypoallergenic Premium Pillow...,B088GXPHFC,$20.39,https://www.amazon.com/MIULEE-Hypoallergenic-P...,,bedding,295
296,FLEXTAILGEAR MAX Pump 2020 Portable Air Pump w...,B088NMQJSN,$27.99,https://www.amazon.com/FLEXTAILGEAR-Portable-R...,,bedding,296


101

In [160]:
# ind2name

In [159]:
# name2ind

# Collaborative filtering
- 아이템 x 유저 행렬 만들어서 진행

##  Memory-based

In [31]:
rating_df2 = rating_df[['ID', 'Asin', 'Rating', 'User_Number', 'Asin_Number']].copy()

In [32]:
item_df2 = item_df[['Asin','Category']].copy()

In [34]:
# preprocess data
row = rating_df2['User_Number']
col = rating_df2['Asin_Number']
data = rating_df2['Rating']

NUM_USERS = len(rating_df2)
NUM_ITEMS = len(item_df2)

# init user-item matrix
mat = csr_matrix((data, (row, col)), shape=(NUM_USERS, NUM_ITEMS), dtype=int)
mat.eliminate_zeros()

# calculate sparsity
sparsity = float(len(mat.nonzero()[0]))
sparsity /= (mat.shape[0] * mat.shape[1])
sparsity *= 100
print(f'Sparsity: {sparsity:4.2f}%. This means that {sparsity:4.2f}% of the user-item ratings have a value.')

# compute similarity
item_corr_mat = cosine_similarity(mat.T)

# get top k item
print(f"The {topK} similar product to Asin B001FCK32C")


similar_items = topKItems(name2ind['B001FCK32C'],
                            topK = topK,
                            corrMat = item_corr_mat,
                            mapName = ind2name)

display(item_df2.loc[item_df2['Asin'].isin(similar_items)])

Sparsity: 0.06%. This means that 0.06% of the user-item ratings have a value.
The 10 similar product to Asin B001FCK32C


KeyError: 1587

## Model based
### Matrix Factorization (MF)
#### TruncatedSVD (Sklearn)

<font color=red><U>**이거 돌릴때마다 값이 다름 이유 모름 그리고 가끔 에러남**</U></font>

In [35]:
epsilon = 1e-9
n_latent_factors = 10

# calculate item latent matrix
item_svd = TruncatedSVD(n_components = n_latent_factors)
item_features = item_svd.fit_transform(mat.transpose()) + epsilon

# calculate user latent matrix
user_svd = TruncatedSVD(n_components = n_latent_factors)
user_features = user_svd.fit_transform(mat) + epsilon

# compute similarity
item_corr_mat = cosine_similarity(item_features)

# get top k item
print(f"The {topK} similar product to Asin B001FCK32C")
similar_items = topKItems(name2ind['B001FCK32C'],
                            topK = topK,
                            corrMat = item_corr_mat,
                            mapName = ind2name)

display(item_df.loc[item_df['Asin'].isin(similar_items)])

del user_features
gc.collect();

The 10 similar product to Asin B001FCK32C


Unnamed: 0,Name,Asin,Price,Link,Img,Category,Asin_Number
0,Sealy Baby - Stain Protection Waterproof Fitte...,B001FCK32C,$15.60,https://www.amazon.com/Sealy-Protection-Waterp...,,bedding,0
66,Amazon Basics Hypoallergenic Quilted Mattress ...,B01KBKHZRO,$23.56,https://www.amazon.com/AmazonBasics-Hypoallerg...,,bedding,66
251,Ely's & Co. Baby Wearable Blanket│Sleep Bag 2-...,B07VF72455,$26.99,https://www.amazon.com/Elys-Baby-Wearable-Blan...,,bedding,251
277,"MOON PINE U Shaped Pregnancy Pillow, Maternity...",B082F3F2MJ,$28.99,https://www.amazon.com/MOON-PINE-Pregnancy-Mat...,https://m.media-amazon.com/images/I/51lH3zyj2L...,bedding,277
52,Tovla Jr. Knives for Kids 3-Piece Nylon Kitche...,B0711QYPJD,$9.95,https://www.amazon.com/Tovla-Knives-3-Piece-Ny...,https://m.media-amazon.com/images/I/61xLvWS5FE...,Kids_Home_Store,475
279,"All-in-ONE Over The Door Organizer, Super Behi...",B09BWQYNHC,$21.24,https://www.amazon.com/Organizer-Storage-Windo...,https://m.media-amazon.com/images/I/81Xyb1N8uZ...,Kids_Home_Store,676
54,Ninja BL610 Professional 72 Oz Countertop Blen...,B00NGV4506,$89.87,https://www.amazon.com/Ninja-Professional-Coun...,https://m.media-amazon.com/images/I/71iD5Ryhua...,Kitchen_Dining,750
90,"Ticent Ice Cube Trays (Set of 2), Silicone Sph...",B01J1BLYUS,$12.98,https://www.amazon.com/Ticent-Silicone-Whiskey...,https://m.media-amazon.com/images/I/41EX6w2D0i...,Kitchen_Dining,786
244,"Football Theme Tablecloth, Polyester 60x84in f...",B08W2XNH7V,$22.96,https://www.amazon.com/Tablecloth-Polyester-Ba...,https://m.media-amazon.com/images/I/61yq38U5rA...,Kitchen_Dining,939
36,Holiday Time 100 Clear Mini Lights - Green Wir...,B01N9SQIZO,$16.49,https://www.amazon.com/Holiday-Time-Clear-Mini...,https://m.media-amazon.com/images/I/71IRnW+Hkl...,Seasonal_Decor,1041


#### Funk MF (Surprise)

In [36]:
funk_rating_df = rating_df[['ID', 'Asin', 'Rating']]

In [37]:
def pred2dict(predictions, top_k=None):
    
    rec_dict = defaultdict(list)
    for user_id, item_id, actual_rating, pred_rating, _ in predictions:
        rec_dict[user_id].append((item_id, pred_rating))        
        
    return rec_dict

def get_top_k_recommendation(rec_dict, user_id, top_k, ind2name):
    
    pred_ratings = rec_dict[user_id]
    # sort descendingly by pred_rating
    pred_ratings = sorted(pred_ratings, key=lambda x: x[1], reverse=True)
    pred_ratings = pred_ratings[:top_k]
    recs = [ind2name[e[0]] for e in pred_ratings]
    
    return recs

# prepare train and test sets
reader = Reader(rating_scale=(1,10))
data = Dataset.load_from_df(funk_rating_df, reader)
train, test = train_test_split(data, test_size=.2, random_state=42)

# init and fit the funk mf model
algo = SVD(random_state = 42)
algo.fit(train)
pred = algo.test(test)

# evaluation the test set
accuracy.rmse(pred)

# extract the item features from algo
item_corr_mat = cosine_similarity(algo.qi)

print(f"The {topK} similar product to Asin B001FCK32C")
similar_items = topKItems(name2ind['B001FCK32C'],
                            topK = topK,
                            corrMat = item_corr_mat,
                            mapName = ind2name)

display(item_df.loc[item_df['Asin'].isin(similar_items)])

del item_corr_mat
gc.collect()

RMSE: 1.4164
The 10 similar product to Asin B001FCK32C


Unnamed: 0,Name,Asin,Price,Link,Img,Category,Asin_Number
0,Sealy Baby - Stain Protection Waterproof Fitte...,B001FCK32C,$15.60,https://www.amazon.com/Sealy-Protection-Waterp...,,bedding,0
15,Dreamtown Kids Toddler Pillow with Pillowcase ...,B00KWR2BCQ,$24.99,https://www.amazon.com/Dreamtown-Kids-Toddler-...,,bedding,15
185,Oubonun Premium Adjustable Loft Quilted Body P...,B07M7TLN6G,$38.99,https://www.amazon.com/Premium-Adjustable-Loft...,,bedding,185
239,2-Pack Cooling Bed Pillows for Sleeping Adjust...,B07T7W7VR3,$46.99,https://www.amazon.com/2-Pack-Cooling-Pillows-...,,bedding,239
268,Bedsure 100% Viscose from Bamboo Sheets Set 4P...,B07YKCZHGK,$38.24,https://www.amazon.com/Bedsure-Viscose-Bamboo-...,,bedding,268
307,Queen/Full/Full-XL Foam Mattress Vacuum Bag fo...,B08BPJ3XXL,$23.70,https://www.amazon.com/Full-XL-Mattress-Vacuum...,,bedding,307
354,Full Size Mattress Protector Waterproof Mattre...,B08ZKQ4NVB,$20.99,https://www.amazon.com/Mattress-Protector-Wate...,,bedding,354
129,"Pamo Babe Deluxe Nursery Center ,Portable Play...",B07RZQC1T1,$82.99,https://www.amazon.com/Portable-Playard-Comfor...,https://m.media-amazon.com/images/I/81GIJxASTh...,Kids_Home_Store,543
183,Regalo My Cot Pals Small Single Portable Toddl...,B087ZKRJ78,$36.99,https://www.amazon.com/Regalo-Cot-Pals-Portabl...,https://m.media-amazon.com/images/I/71uRAqwcwM...,Kids_Home_Store,592
271,Birthday Gifts For Women-Relaxing Spa Gift Box...,B09B9H6YB4,$28.88,https://www.amazon.com/Birthday-Women-Relaxing...,https://m.media-amazon.com/images/I/71JDivtz3U...,Kitchen_Dining,966


0