## Data

In [67]:
import pandas as pd
import numpy as np
from scipy.sparse.linalg import svds
book_ratings = pd.read_csv('BX-Book-Ratings.csv', sep=';', error_bad_lines=False, encoding='latin-1')

users = pd.read_csv('BX-Users.csv', sep=';', error_bad_lines=False, encoding='latin-1')

books = pd.read_csv('BX-Books.csv', sep=';', error_bad_lines=False, encoding='latin-1', usecols=['ISBN', 'Book-Title', 'Book-Author', 'Year-Of-Publication', 'Publisher'])

books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company


In [68]:
book_ratings = pd.merge(book_ratings, books, on='ISBN')
columns_unused = ['Book-Author', 'Year-Of-Publication', 'Publisher']
book_ratings = book_ratings.drop(columns_unused, axis=1)
book_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


In [82]:
book_ratings.dropna(axis=0, subset=['Book-Title'])
popularity_books_threshold = 200
filter_books = book_ratings['Book-Title'].value_counts() > popularity_books_threshold
filter_books = filter_books[filter_books].index.tolist()

popularity_customer_threshold = 200
customer_filter = book_ratings['User-ID'].value_counts() > popularity_customer_threshold
customer_filter = customer_filter[customer_filter].index.tolist()


df = book_ratings[book_ratings['User-ID'].isin(filter_users)]
df = book_ratings[book_ratings['Book-Title'].isin(filter_books)]
print(df['User-ID'].value_counts().idxmax())
n_users = df['User-ID'].unique().shape[0]
n_books = df['ISBN'].unique().shape[0]
print('users = ' + str(n_users) + ' | Number books = ' + str(n_books))
df.head()


11676
users = 27719 | Number books = 1122


Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
62,276727,446520802,0,The Notebook
63,278418,446520802,0,The Notebook
64,638,446520802,0,The Notebook
65,3363,446520802,0,The Notebook
66,7158,446520802,10,The Notebook


In [71]:
df_pivot = df.pivot(index = 'User-ID', columns ='ISBN', values = 'Book-Rating').fillna(0)
df_pivot.head()

ISBN,0001047973,0020697406,0060093102,0060094818,0060129565,0060158638,0060168013,006017322X,0060175400,0060198133,...,B00006JO6O,B00007CWQC,B00008WFXL,B00009EF82,B0000C7BNG,B0000E63CJ,B0000T6KHI,B0000VZEH8,B0001FZGPI,B0001PIOX4
User-ID,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
8,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
9,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
14,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
16,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
26,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


## Singular Value Decomposition

In [84]:
matrix = df_pivot.as_matrix()
user_ratings_mean = np.mean(matrix, axis = 1)
Ratings = matrix - user_ratings_mean.reshape(-1, 1)
print(Ratings.shape)
sparsity = round(1.0 - len(book_ratings) / float(n_users * n_books), 3)
print('Sparsity level is ' +  str(sparsity * 100) + '%')
U, sigma, Vt = svds(Ratings_demeaned, k = 25)

sigma = np.diag(sigma)

all_user_predicted_ratings = np.dot(np.dot(U, sigma), Vt) + user_ratings_mean.reshape(-1, 1)

preds = pd.DataFrame(all_user_predicted_ratings, columns = df_pivot.columns)
preds.head()

  """Entry point for launching an IPython kernel.


(27719, 1122)
Sparsity level is 96.7%


ISBN,0001047973,0020697406,0060093102,0060094818,0060129565,0060158638,0060168013,006017322X,0060175400,0060198133,...,B00006JO6O,B00007CWQC,B00008WFXL,B00009EF82,B0000C7BNG,B0000E63CJ,B0000T6KHI,B0000VZEH8,B0001FZGPI,B0001PIOX4
0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
2,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
3,0.009901,0.008025,0.008025,0.009132,0.007749,0.008156,0.007407,0.007385,-0.000329,0.004545,...,0.008025,0.008025,0.008025,0.007609,0.008149,0.008062,0.008926,0.007891,0.00809,0.008025
4,0.041087,-0.003569,-0.003569,0.035871,-0.002159,-0.008405,0.003479,-0.008726,0.110784,0.025275,...,-0.003569,-0.003569,-0.003569,-0.006599,-0.003953,-0.002787,-0.010797,0.000547,-0.002676,-0.003569


In [89]:
def what_is_recommend_books(predictions, userID, books, original_ratings, num_recommendations):
    
    user_row_number = userID - 1 
    sorted_user_predictions = preds.iloc[user_row_number].sort_values(ascending=False)
    
    user_data = original_ratings[original_ratings['User-ID'] == (userID)]
    user_full = (user_data.merge(books, how = 'left', left_on = 'ISBN', right_on = 'ISBN').sort_values(['Book-Rating'], ascending=False))

    print('User with id {0} has rated {1} books.'.format(userID, user_full.shape[0]))
  
    recommendations = (books[~books['ISBN'].isin(user_full['ISBN'])].
         merge(pd.DataFrame(sorted_user_predictions).reset_index(), how = 'left', left_on = 'ISBN', right_on = 'ISBN').
         rename(columns = {user_row_number: 'Predictions'}).
         sort_values('Predictions', ascending = False).iloc[:num_recommendations, :-1])

    return user_full, recommendations
already_rated, predictions = what_is_recommend_books(preds, 11676, books, book_ratings, 20)


User with id 11676 has rated 11144 books.


In [76]:
already_rated, predictions = what_is_recommend_books(preds, 11676, books, book_ratings, 20)

User 11676 has already rated 11144 books.


In [77]:
already_rated.head(20)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title_x,Book-Title_y,Book-Author,Year-Of-Publication,Publisher
0,11676,0446520802,10,The Notebook,The Notebook,Nicholas Sparks,1996,Warner Books
4228,11676,0060083263,10,"Sleeping Beauty (Margolin, Phillip)","Sleeping Beauty (Margolin, Phillip)",Phillip Margolin,2004,HarperCollins
4379,11676,0060959274,10,The Justus Girls,The Justus Girls,"Evelyn \Slim\"" Lambright""",2002,Perennial
4375,11676,0060953691,10,La Cucina: A Novel of Rapture,La Cucina: A Novel of Rapture,Lily Prior,2001,Ecco
4351,11676,0060911395,10,Last Chance Garage: A System-By-System Guide t...,Last Chance Garage: A System-By-System Guide t...,Brad Sears,1984,HarperCollins Publishers
4350,11676,0060909994,10,Fables for Our Time and Famous Poems (Harper C...,Fables for Our Time and Famous Poems (Harper C...,James Thurber,1990,Perennial
4347,11676,0060809736,10,Death Be Not Proud: A Memoir,Death Be Not Proud: A Memoir,John Gunther,1989,Harpercollins
4344,11676,0060807792,10,Our Town: A Play in Three Acts,Our Town: A Play in Three Acts,Thornton Niven Wilder,1985,Harpercollins
4340,11676,0060801921,10,Think on These Things,Think on These Things,J. Krishnamurti,1975,HarperCollins Publishers
4337,11676,0060609176,10,Meeting Jesus Again for the First Time : The H...,Meeting Jesus Again for the First Time : The H...,Marcus J. Borg,1995,HarperSanFrancisco


In [78]:
predictions

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher
8715,451160525,"The Gunslinger (The Dark Tower, Book 1)",Stephen King,1994,New American Library
3905,60929871,Brave New World,Aldous Huxley,1998,Perennial
2251,679746048,"Girl, Interrupted",SUSANNA KAYSEN,1994,Vintage
3117,1573225517,High Fidelity,Nick Hornby,1996,Riverhead Books
21456,451169514,It,Stephen King,1997,Signet Book
5911,671727796,The Color Purple,Alice Walker,1990,Pocket
10131,618002235,"The Two Towers (The Lord of the Rings, Part 2)",J. R. R. Tolkien,1999,Houghton Mifflin Company
4578,451167317,The Dark Half,Stephen King,1994,Signet Book
2158,399141146,The Hundred Secret Senses,Amy Tan,1995,Putnam Pub Group
923,805063889,Nickel and Dimed: On (Not) Getting By in America,Barbara Ehrenreich,2001,Metropolitan Books
