# Collaborative recommendations

##  BX-Book Data

In [9]:
import pandas as pd
import numpy as np
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
book_ratings = pd.read_csv('BX-Book-Ratings.csv', sep=';', error_bad_lines=False, encoding='latin-1')

customers = pd.read_csv('BX-Users.csv', sep=';', error_bad_lines=False, encoding='latin-1')

books = pd.read_csv('BX-Books.csv', sep=';', error_bad_lines=False, encoding='latin-1')
print(books.shape)

#Remove duplicate rows for Book-Title
books = books.drop_duplicates(['Book-Title'], keep='last')
print(books.shape)
named_ratings = pd.merge(book_ratings, books, on='ISBN')
unused_columns = ['Book-Author', 'Year-Of-Publication', 'Publisher', 'Image-URL-S', 'Image-URL-M' , 'Image-URL-L']
named_ratings = named_ratings.drop(unused_columns, axis=1)
named_ratings.head()
# ratings_with_title = ratings_with_title.dropna(axis=0, subset=['Book-Title'])

b'Skipping line 6452: expected 8 fields, saw 9\nSkipping line 43667: expected 8 fields, saw 10\nSkipping line 51751: expected 8 fields, saw 9\n'
b'Skipping line 92038: expected 8 fields, saw 9\nSkipping line 104319: expected 8 fields, saw 9\nSkipping line 121768: expected 8 fields, saw 9\n'
b'Skipping line 144058: expected 8 fields, saw 9\nSkipping line 150789: expected 8 fields, saw 9\nSkipping line 157128: expected 8 fields, saw 9\nSkipping line 180189: expected 8 fields, saw 9\nSkipping line 185738: expected 8 fields, saw 9\n'
b'Skipping line 209388: expected 8 fields, saw 9\nSkipping line 220626: expected 8 fields, saw 9\nSkipping line 227933: expected 8 fields, saw 11\nSkipping line 228957: expected 8 fields, saw 10\nSkipping line 245933: expected 8 fields, saw 9\nSkipping line 251296: expected 8 fields, saw 9\nSkipping line 259941: expected 8 fields, saw 9\nSkipping line 261529: expected 8 fields, saw 9\n'


(271360, 8)
(242135, 8)


Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


In [21]:


best_known_books = 50
filter_books = named_ratings['Book-Title'].value_counts() > best_known_books
filter_books = filter_books[filter_books].index.tolist()

popularity_customer_threshold = 50
customer_filter = named_ratings['User-ID'].value_counts() > popularity_customer_threshold
customer_filter = customer_filter[customer_filter].index.tolist()


df = named_ratings[named_ratings['User-ID'].isin(customer_filter)]
df = named_ratings[named_ratings['Book-Title'].isin(filter_books)]
df.head()


Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


## kNN(k Nearest Neighbors)

In [5]:
df_pivot = df.pivot(index='Book-Title', columns='User-ID', values='Book-Rating').fillna(0)
df_pivot.head()

User-ID,9,14,44,75,99,165,178,193,228,242,...,278724,278732,278755,278771,278773,278798,278832,278843,278851,278854
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
16 Lighthouse Road,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
204 Rosewood Lane,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
4 Blondes,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [24]:
generate_dataframe_matrix = csr_matrix(df_pivot.values)
model_knn = NearestNeighbors(metric = 'cosine', algorithm = 'brute')
print(model_knn.fit(generate_dataframe_matrix))
query_index = np.random.choice(df_pivot.shape[0])
distances, indices = model_knn.kneighbors(df_pivot.iloc[query_index, :].values.reshape(1, -1), n_neighbors=6)

for i in range (0, len(distances.flatten())):
    if i == 0:
        print('What is recomanded for \'{0}\':\n'.format(df_pivot.index[query_index]))
    else:
        print('{0}: \'{1}\''.format(i, df_pivot.index[indices.flatten()[i]]))


NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
         metric_params=None, n_jobs=None, n_neighbors=5, p=2, radius=1.0)
What is recomanded for 'A Rose For Her Grave &amp; Other True Cases (Ann Rule's Crime Files)':

1: 'Winter Moon'
2: 'Macgregor Brides (Macgregors)'
3: 'Purity in Death'
4: 'Time And Again'
5: 'The Magic of You (Malory Novels (Paperback))'
