In [1]:
import pandas as pd
import numpy as np
import seaborn as sns
import matplotlib.pyplot as plt
import scipy

In [5]:
# dataset 불러오기 (local에서 할 경우 파일 경로 변경 필요)
ori_user_item_rating = pd.read_csv("/content/drive/MyDrive/machineLearing/ProcessedData2.csv", encoding='ISO-8859-1')
ori_user_item_rating.head()

Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher
0,276725,034545104X,0,Flesh Tones: A Novel,M. J. Rose,2002,Ballantine Books
1,276726,0155061224,5,Rites of Passage,Judith Rae,2001,Heinle
2,276727,0446520802,0,The Notebook,Nicholas Sparks,1996,Warner Books
3,276729,052165615X,3,Help!: Level 1,Philip Prowse,1999,Cambridge University Press
4,276729,0521795028,6,The Amsterdam Connection : Level 4 (Cambridge ...,Sue Leather,2001,Cambridge University Press


In [9]:
from sklearn.model_selection import train_test_split

# Separated into (80% learning, 20% testing)
train_df, test_df = train_test_split(ori_user_item_rating, test_size=0.2, random_state=42)

In [7]:
ori_user_item_rating.pivot(index='User-ID', columns='ISBN', values='Rating')

ISBN,0002005018,0002240114,000225669X,0002558122,0002740230,0006276199,000636988X,0006379702,0006485294,000649840X,...,9722100718,9722509713,9724115380,9724119378,9726101794,9871138016,9995585227,9997522052,B00008NRHQ,B0000DAPP1
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,,,,,,,,,,,...,,,,,,,,,,
8,5.0,,,,,,,,,,...,,,,,,,,,,
9,,,,,,,,,,,...,,,,,,,,,,
10,,,,,,,,,,,...,,,,,,,,,,
12,,,,,,,,,,,...,,,,,,,,,,
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,,,,,,,,,,,...,,,,,,,,,,
278849,,,,,,,,,,,...,,,,,,,,,,
278851,,,,,,,,,,,...,,,,,,,,,,
278852,,,,,,,,,,,...,,,,,,,,,,


In [10]:
def create_user_item_matrix(ratings)->pd.DataFrame:
  mat = ratings.pivot(index='User-ID', columns='ISBN', values='Rating')
  mat[~mat.isna()] = 1

  # 사용자가 보지 않은 책 : Nan -> 0
  mat.fillna(0, inplace=True)
  return mat

In [11]:
user_item_matrix = create_user_item_matrix(train_df)
user_item_matrix

ISBN,0002005018,000225669X,0002740230,0006276199,000649840X,0006513409,0006542808,0006543545,0006546684,0006550789,...,9722100718,9722509713,9724115380,9724119378,9726101794,9871138016,9995585227,9997522052,B00008NRHQ,B0000DAPP1
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
8,1.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278849,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278851,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
278852,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [12]:
from scipy.sparse.linalg import svds

In [17]:
def get_svd_prediction(user_item_matrix, k):
  # get U, S(sigma), Vt
  u, s, vt = svds(user_item_matrix.to_numpy(), k=k)

  # 내적으로 원본 행렬 복원 / s -> np.diag 함수로 대각행렬로 변환
  preds = np.dot(np.dot(u, np.diag(s)), vt)

  preds = pd.DataFrame(preds,columns=user_item_matrix.columns, index=user_item_matrix.index)
  preds = (preds - preds.min()) / (preds.max() - preds.min())

  return preds


In [18]:
predictions = get_svd_prediction(user_item_matrix, 10)
predictions

ISBN,0002005018,000225669X,0002740230,0006276199,000649840X,0006513409,0006542808,0006543545,0006546684,0006550789,...,9722100718,9722509713,9724115380,9724119378,9726101794,9871138016,9995585227,9997522052,B00008NRHQ,B0000DAPP1
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
2,0.435170,0.869558,0.032709,0.040435,0.040435,0.039498,0.000868,0.558353,0.002488,0.754162,...,0.480635,0.480635,0.606942,0.528568,0.480635,0.027156,0.008745,0.034688,0.039942,0.005940
8,0.435170,0.869558,0.032709,0.040435,0.040435,0.039498,0.000868,0.558353,0.002488,0.754162,...,0.480635,0.480635,0.606942,0.528568,0.480635,0.027156,0.008745,0.034688,0.039942,0.005940
9,0.435281,0.869395,0.032754,0.040463,0.040463,0.039515,0.001179,0.558173,0.002799,0.754164,...,0.480712,0.480712,0.606870,0.528599,0.480712,0.027171,0.008732,0.034709,0.039972,0.005937
14,0.434256,0.867895,0.034899,0.041469,0.041469,0.049129,0.001133,0.559404,0.002804,0.747370,...,0.480644,0.480644,0.614286,0.528865,0.480644,0.039309,0.016017,0.038874,0.040475,0.013935
16,0.435170,0.869558,0.032709,0.040435,0.040435,0.039498,0.000868,0.558353,0.002488,0.754162,...,0.480635,0.480635,0.606942,0.528568,0.480635,0.027156,0.008745,0.034688,0.039942,0.005940
...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...,...
278846,0.435170,0.869558,0.032709,0.040435,0.040435,0.039498,0.000868,0.558353,0.002488,0.754162,...,0.480635,0.480635,0.606942,0.528568,0.480635,0.027156,0.008745,0.034688,0.039942,0.005940
278849,0.435283,0.869396,0.032754,0.040421,0.040421,0.039479,0.001179,0.558173,0.002799,0.754177,...,0.480689,0.480689,0.606857,0.528600,0.480689,0.027126,0.008733,0.034667,0.039930,0.005938
278851,0.435169,0.869559,0.032709,0.040446,0.040446,0.039507,0.000868,0.558356,0.002488,0.754158,...,0.480640,0.480640,0.606947,0.528569,0.480640,0.027166,0.008745,0.034697,0.039950,0.005939
278852,0.435170,0.869558,0.032709,0.040435,0.040435,0.039498,0.000868,0.558353,0.002488,0.754162,...,0.480635,0.480635,0.606942,0.528568,0.480635,0.027156,0.008745,0.034688,0.039942,0.005940


In [79]:
random_user_id = test_df['User-ID'].sample(1).iloc[0]

In [80]:
user_ISBNs = train_df[train_df['User-ID'] == random_user_id]['ISBN'].tolist()

In [81]:
user_books = train_df[train_df['ISBN'].isin(user_ISBNs)]

In [82]:
print(random_user_id, "가 읽은 책")
print(len(user_books), user_books)

278418 가 읽은 책
3473       User-ID        ISBN  Rating  \
4412   278418  0345285549       0   
4808   278418  0373122772       5   
6070   278418  0517598183       0   
5312   278418  0380775263       0   
6062   278418  0517065258       0   
...       ...         ...     ...   
6265   278418  0590010891       0   
5734   278418  0440407613       0   
5191   278418  0373708963       0   
5390   278418  0394825527       0   
7270   278418  0816743274       0   

                                                  Title               Author  \
4412                              Elfstones of Shannara         Terry Brooks   
4808          The Disobedient Mistress  (Sister Brides)         Lynne Graham   
6070  Ageless Body, Timeless Mind: A Companion Guide...  Deepak, M.D. Chopra   
5312                                    Red Sky Warrior        Genell Dellin   
6062                                    Monster Stories      Jane Launchbury   
...                                                 ... 

In [83]:
user_predictions = predictions.loc[random_user_id].sort_values(ascending=False)

In [84]:
user_predictions = user_predictions[~user_predictions.index.isin(user_ISBNs)]

In [85]:
user_predictions = user_predictions.head(5)

In [86]:
user_recommendations = train_df[train_df['ISBN'].isin(user_predictions.index)]

In [87]:
user_recommendations["recommadation_score"] = user_predictions.values

A value is trying to be set on a copy of a slice from a DataFrame.
Try using .loc[row_indexer,col_indexer] = value instead

See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy
  user_recommendations["recommadation_score"] = user_predictions.values


In [88]:
print("recommended books")
print(user_recommendations)

recommended books
       User-ID        ISBN  Rating                                    Title  \
9399       505  067170124X       0      HIS LITTLE WOMEN : HIS LITTLE WOMEN   
2835    277982  0380718669       0    Tigers Burning: A Duffy House Mystery   
2770    277962  0380710846       0  Caesar's Women (Masters of Rome Series)   
10577     1063  0671721038       6             The New Hugo Winners, Vol. 2   
2541    277879  0671737643       0                         Whitney, My Love   

                   Author  Year                           Publisher  \
9399       Judith Rossner  1991                              Pocket   
2835         Crabbe Evers  1995  Harper Mass Market Paperbacks (Mm)   
2770   Colleen McCullough  1997                                Avon   
10577        Isaac Asimov  1991                                Baen   
2541      Judith McNaught  1991                              Pocket   

       recommadation_score  
9399                   1.0  
2835                  

In [89]:
len(user_books)

3473

In [90]:
user_recommendations

Unnamed: 0,User-ID,ISBN,Rating,Title,Author,Year,Publisher,recommadation_score
9399,505,067170124X,0,HIS LITTLE WOMEN : HIS LITTLE WOMEN,Judith Rossner,1991,Pocket,1.0
2835,277982,0380718669,0,Tigers Burning: A Duffy House Mystery,Crabbe Evers,1995,Harper Mass Market Paperbacks (Mm),1.0
2770,277962,0380710846,0,Caesar's Women (Masters of Rome Series),Colleen McCullough,1997,Avon,1.0
10577,1063,0671721038,6,"The New Hugo Winners, Vol. 2",Isaac Asimov,1991,Baen,1.0
2541,277879,0671737643,0,"Whitney, My Love",Judith McNaught,1991,Pocket,1.0
