In [1]:
from sklearn.decomposition import TruncatedSVD
from scipy.sparse.linalg import svds

import matplotlib.pyplot as plt
import seaborn as sns
import pandas as pd
import numpy as np
import warnings
warnings.filterwarnings("ignore")

In [2]:
log_data = pd.read_csv('ratings.csv')
book_data = pd.read_csv('books.csv')

In [3]:
log_data.head()

Unnamed: 0,book_id,user_id,rating
0,1,314,5
1,1,439,3
2,1,588,5
3,1,1169,4
4,1,1185,4


In [4]:
log_data = log_data[['user_id', 'book_id', 'rating']]
log_data.head()

Unnamed: 0,user_id,book_id,rating
0,314,1,5
1,439,1,3
2,588,1,5
3,1169,1,4
4,1185,1,4


In [5]:
log_data=log_data.drop_duplicates(['user_id','book_id'], keep='first')

In [6]:
user_book_log = log_data.pivot(index = 'user_id', columns='book_id', values='rating').fillna(0)
user_book_log.head()

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
user_id,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
5,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [7]:
matrix = user_book_log.values

In [8]:
user_log_mean = np.mean(matrix, axis = 1)

In [9]:
matrix_user_mean = matrix - user_log_mean.reshape(-1, 1)

In [10]:
matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [11]:
matrix.shape

(53424, 10000)

In [12]:
user_log_mean.shape

(53424,)

In [13]:
matrix_user_mean.shape

(53424, 10000)

In [14]:
pd.DataFrame(matrix_user_mean, columns = user_book_log.columns).head()

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
0,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,...,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011,-0.0011
1,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,...,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013,-0.0013
2,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,...,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002,-0.0002
3,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,...,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012,-0.0012
4,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,...,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021,-0.0021


In [15]:
U, sigma, Vt = svds(matrix_user_mean, k = 12)

In [16]:


print(U.shape)
print(sigma.shape)
print(Vt.shape)



(53424, 12)
(12,)
(12, 10000)


In [17]:
sigma = np.diag(sigma)

In [18]:
sigma.shape

(12, 12)

In [19]:
sigma[0]

array([182.42648687,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ])

In [20]:
sigma[1]

array([  0.        , 187.08179553,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ,
         0.        ,   0.        ,   0.        ,   0.        ])

In [21]:
svd_user_predicted_log = np.dot(np.dot(U, sigma), Vt) + user_log_mean.reshape(-1, 1)

In [22]:
df_svd_preds = pd.DataFrame(svd_user_predicted_log, columns = user_book_log.columns)
df_svd_preds.head()

book_id,1,2,3,4,5,6,7,8,9,10,...,9991,9992,9993,9994,9995,9996,9997,9998,9999,10000
0,0.00015,0.000224,-0.00063,0.000683,0.000688,-0.001244,0.000233,-0.000239,0.000563,-0.000151,...,0.001925,0.001671,0.001796,0.001709,0.001715,0.001349,0.001859,0.001771,0.001852,0.001797
1,0.001039,0.001725,0.000468,0.001865,0.001764,-0.001533,0.001791,0.000911,0.001427,0.001171,...,0.002124,0.002126,0.002161,0.002275,0.001896,0.001702,0.002175,0.002152,0.002111,0.002189
2,1.3e-05,0.00015,4.3e-05,0.000173,0.000161,-0.000281,0.000172,0.000117,0.000117,0.000131,...,0.000347,0.000296,0.000346,0.000358,0.000328,0.000261,0.000356,0.000341,0.00034,0.000365
3,-0.002855,-0.003767,-0.00339,-0.003003,-0.002688,-0.001774,-0.003131,-0.003648,-0.001711,-0.004185,...,0.001498,0.001396,0.001303,0.00073,0.001595,0.000764,0.00145,0.001448,0.001376,0.001395
4,0.002192,0.002953,0.00149,0.003017,0.002885,-0.001369,0.002178,0.001878,0.002554,0.002302,...,0.003028,0.002948,0.003071,0.002937,0.002443,0.003622,0.003097,0.003094,0.003099,0.002972


In [23]:
df_svd_preds.shape

(53424, 10000)

In [33]:
def recommend_movies(df_svd_preds, user_Id, ori_books_df, ori_log_df, num_recommendations=5):
  user_row_number = user_Id - 1 
  sorted_user_predictions = df_svd_preds.iloc[user_row_number].sort_values(ascending=False)
  user_data = ori_log_df[ori_log_df.userId == user_Id]
  user_history = user_data.merge(ori_books_df, on = 'book_id').sort_values(['rating'], ascending=False)
  recommendations = ori_books_df[~ori_books_df['book_id'].isin(user_history['book_id'])]
  recommendations = recommendations.merge( pd.DataFrame(sorted_user_predictions).reset_index(), on = 'book_id')
  recommendations = recommendations.rename(columns = {user_row_number: 'Predictions'}).sort_values('Predictions', ascending = False).iloc[:num_recommendations, :]

  return user_history, recommendations

In [34]:
already_rated, predictions = recommend_movies(df_svd_preds, 330, book_data, log_data, 10)

In [35]:
already_rated.head(10)

Unnamed: 0,user_id,book_id,rating,id,best_book_id,work_id,books_count,isbn,isbn13,authors,...,ratings_count,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url


In [36]:
predictions

Unnamed: 0,id,book_id,best_book_id,work_id,books_count,isbn,isbn13,authors,original_publication_year,original_title,...,work_ratings_count,work_text_reviews_count,ratings_1,ratings_2,ratings_3,ratings_4,ratings_5,image_url,small_image_url,Predictions
472,3284,5181,5181,2471173,23,671042572,9780671000000.0,Bret Lott,1991.0,Jewel (Oprah's Book Club),...,32731,587,763,2803,10719,11586,6860,https://s.gr-assets.com/assets/nophoto/book/11...,https://s.gr-assets.com/assets/nophoto/book/50...,0.001835
616,5537,5752,5752,9151,36,142003344,9780142000000.0,Steven Pinker,2002.0,The Blank Slate: The Modern Denial of Human Na...,...,16429,643,397,803,2781,5801,6647,https://s.gr-assets.com/assets/nophoto/book/11...,https://s.gr-assets.com/assets/nophoto/book/50...,0.001785
707,7453,4507,4507,1370491,32,553803077,9780554000000.0,Julie Gregory,2003.0,Sickened,...,13713,1079,271,1109,4097,4942,3294,https://s.gr-assets.com/assets/nophoto/book/11...,https://s.gr-assets.com/assets/nophoto/book/50...,0.001716
258,1145,5204,5204,420701,52,452282829,9780452000000.0,Joyce Carol Oates,1996.0,We Were the Mulvaneys,...,80906,2557,2679,7047,22287,29134,19759,https://images.gr-assets.com/books/1309282868m...,https://images.gr-assets.com/books/1309282868s...,0.001662
337,1789,4325,4325,7995,35,142401757,9780142000000.0,Sarah Dessen,2000.0,Dreamland,...,65731,3813,1556,4772,15338,20590,23475,https://images.gr-assets.com/books/1385861832m...,https://images.gr-assets.com/books/1385861832s...,0.001639
175,653,2526,2526,3213039,231,156007754,9780156000000.0,"José Saramago, Giovanni Pontiero",1995.0,Ensaio Sobre a Cegueira,...,133257,11061,2960,6745,21616,47839,54097,https://images.gr-assets.com/books/1327866409m...,https://images.gr-assets.com/books/1327866409s...,0.001634
270,1238,4952,4952,3271214,53,1932416641,9781932000000.0,Dave Eggers,2006.0,What Is the What: The Autobiography of Valenti...,...,64740,6741,684,2128,9938,25665,26325,https://images.gr-assets.com/books/1328837457m...,https://images.gr-assets.com/books/1328837457s...,0.001538
468,3247,3977,3977,1882574,58,553572946,9780554000000.0,Dan Simmons,1996.0,Endymion,...,33572,969,273,1201,5556,12997,13545,https://images.gr-assets.com/books/1329611385m...,https://images.gr-assets.com/books/1329611385s...,0.001487
138,483,5113,5113,3118417,125,316769029,9780317000000.0,J.D. Salinger,1961.0,Franny and Zooey,...,162697,5297,2841,9398,34950,56515,58993,https://images.gr-assets.com/books/1355037988m...,https://images.gr-assets.com/books/1355037988s...,0.001473
349,1928,3679,3679,910752,71,143037749,9780143000000.0,Zadie Smith,2005.0,On Beauty,...,49803,3904,1357,4393,13695,19496,10862,https://images.gr-assets.com/books/1495961870m...,https://images.gr-assets.com/books/1495961870s...,0.001468
