In [None]:
import pandas as pd
import numpy as np
import pickle

from sklearn.metrics.pairwise import cosine_similarity

### 데이터 불러오기

In [None]:
path = "/content/drive/MyDrive/Colab Notebooks/recommendation_system/"

In [None]:
rating_books = pd.read_csv(path + 'ratings_books.csv')

print(f'Dataset shape: {rating_books.shape}')

Dataset shape: (100000, 8)


In [None]:
rating_books.head(3)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-M
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...
1,2313,034545104X,5,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...
2,6543,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books,http://images.amazon.com/images/P/034545104X.0...


In [None]:
rating_books['Book-Title'].nunique()

2461

### 피벗 테이블 만들기

In [None]:
ratings_matrix = rating_books.pivot_table('Book-Rating', index='User-ID', columns='Book-Title')
ratings_matrix = ratings_matrix.fillna(0)
ratings_matrix.shape

(27101, 2461)

In [None]:
ratings_matrix_T = ratings_matrix.transpose()
ratings_matrix_T.shape

(2461, 27101)

In [None]:
ratings_matrix.mean()

Book-Title
007 El Mundo Nunca Es Suficiente                                    0.000000
01-01-00: A Novel of the Millennium                                 0.001181
01-01-00: The Novel of the Millennium                               0.002251
100 Best-Loved Poems (Dover Thrift Editions)                        0.002214
101 Experiments in the Philosophy of Everyday Life                  0.000775
                                                                      ...   
Zen Essence : The Science of Freedom (Shambhala Pocket Classics)    0.000000
Zentak, tome 1 : La Passe des Argonautes                            0.000000
Zuiderkruis: Roman                                                  0.000590
\ Lamb to the Slaughter and Other Stories (Penguin 60s S.)"         0.002841
flambeau@darkcorp.com                                               0.000406
Length: 2461, dtype: float64

### 아이템 간 유사도 산출

In [None]:
item_sim = cosine_similarity(ratings_matrix_T, ratings_matrix_T)

In [None]:
# 코사인 유사도의 결과를 도서명과 매핑하여 DataFrame으로 변환
item_sim_df = pd.DataFrame(data=item_sim, index=ratings_matrix.columns, columns=ratings_matrix.columns)
item_sim_df.head(3)

Book-Title,007 El Mundo Nunca Es Suficiente,01-01-00: A Novel of the Millennium,01-01-00: The Novel of the Millennium,100 Best-Loved Poems (Dover Thrift Editions),101 Experiments in the Philosophy of Everyday Life,101 Great Resumes,1421 : The Year China Discovered America,1984,1st to Die: A Novel,"2002 World Press Photo (World Press Photo, 2002)",...,You and No Other (Topaz Historical Romance),"You're Not Elected, Charlie Brown",Your Family Will Love It!: Quick and Healthy Weekday Meals for the Hard-To-Please,Your Finances in Changing Times (The Christian Financial Concepts Series),"Your Life in Your Hands: Understanding, Preventing and Overcoming Breast Cancer",Zen Essence : The Science of Freedom (Shambhala Pocket Classics),"Zentak, tome 1 : La Passe des Argonautes",Zuiderkruis: Roman,"\ Lamb to the Slaughter and Other Stories (Penguin 60s S.)""",flambeau@darkcorp.com
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 El Mundo Nunca Es Suficiente,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01-01-00: A Novel of the Millennium,0.0,1.0,0.153766,0.0,0.0,0.0,0.0,0.0,0.051675,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01-01-00: The Novel of the Millennium,0.0,0.153766,1.0,0.0,0.0,0.0,0.0,0.0,0.027066,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
item_sim_df.shape

(2461, 2461)

In [None]:
# 코사인 유사도 피클 파일로 저장
pickle.dump(item_sim_df, open('cosine_sim.pickle', 'wb'))

In [None]:
cosine_sim = pickle.load(open('cosine_sim.pickle', 'rb'))
cosine_sim.head()

Book-Title,007 El Mundo Nunca Es Suficiente,01-01-00: A Novel of the Millennium,01-01-00: The Novel of the Millennium,100 Best-Loved Poems (Dover Thrift Editions),101 Experiments in the Philosophy of Everyday Life,101 Great Resumes,1421 : The Year China Discovered America,1984,1st to Die: A Novel,"2002 World Press Photo (World Press Photo, 2002)",...,You and No Other (Topaz Historical Romance),"You're Not Elected, Charlie Brown",Your Family Will Love It!: Quick and Healthy Weekday Meals for the Hard-To-Please,Your Finances in Changing Times (The Christian Financial Concepts Series),"Your Life in Your Hands: Understanding, Preventing and Overcoming Breast Cancer",Zen Essence : The Science of Freedom (Shambhala Pocket Classics),"Zentak, tome 1 : La Passe des Argonautes",Zuiderkruis: Roman,"\ Lamb to the Slaughter and Other Stories (Penguin 60s S.)""",flambeau@darkcorp.com
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
007 El Mundo Nunca Es Suficiente,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01-01-00: A Novel of the Millennium,0.0,1.0,0.153766,0.0,0.0,0.0,0.0,0.0,0.051675,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
01-01-00: The Novel of the Millennium,0.0,0.153766,1.0,0.0,0.0,0.0,0.0,0.0,0.027066,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
100 Best-Loved Poems (Dover Thrift Editions),0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.020415,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
101 Experiments in the Philosophy of Everyday Life,0.0,0.0,0.0,0.0,1.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [None]:
# item_sim_df.to_csv(path + 'item_sim_df.csv', index=False)