In [60]:
import numpy as np
import pandas as pd
from math import sqrt
from sklearn.metrics import mean_squared_error
from sklearn.metrics.pairwise import pairwise_distances
from sklearn.preprocessing import LabelEncoder

In [90]:
df = pd.read_csv("BX-Book-Ratings.csv", encoding="latin1")
df.sort_values(["user_id", "isbn"], inplace=True)

In [91]:
df = df.head(10000)
df

Unnamed: 0,user_id,isbn,rating
9561,2,195153448,0
9562,7,34542252,0
9571,8,074322678X,5
9574,8,080652121X,0
9576,8,1552041778,5
...,...,...,...
19556,3728,553574671,9
19553,3728,60008776,7
19554,3728,61057819,0
19557,3728,761513779,0


In [92]:
unique_users = df.user_id.unique().shape[0]
unique_users

1323

In [93]:
unique_books = df.isbn.unique().shape[0]
unique_books

8742

In [94]:
data_matrix = np.zeros((unique_users, unique_books))
data_matrix

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.]])

In [95]:
df['isbn_encoded'] = LabelEncoder().fit_transform(df.isbn)

In [96]:
df.head()

Unnamed: 0,user_id,isbn,rating,isbn_encoded
9561,2,195153448,0,1467
9562,7,34542252,0,2490
9571,8,074322678X,5,443
9574,8,080652121X,0,475
9576,8,1552041778,5,877


In [97]:
for line in df[['user_id','rating','isbn_encoded']].head().itertuples():
    data_matrix[line[1]-1, line[2]-1] = line[3]

In [98]:
user_similarity = pairwise_distances(data_matrix, metric='cosine')
item_similarity = pairwise_distances(data_matrix.T, metric='cosine')

In [99]:
def predict(ratings, similarity, type='user'):
    if type == 'user':
        mean_user_rating = ratings.mean(axis=1)

        ratings_diff = (ratings - mean_user_rating[:, np.newaxis])
        pred = mean_user_rating[:, np.newaxis] + similarity.dot(
            ratings_diff) / np.array([np.abs(similarity).sum(axis=1)]).T
    elif type == 'book':
        pred = ratings.dot(similarity) / \
            np.array([np.abs(similarity).sum(axis=1)])
    return pred

In [116]:
user_prediction = predict(data_matrix, user_similarity, type='user')
book_prediction = predict(data_matrix, item_similarity, type='book')