# kNN Collaborative Recommendation

Finds clusters of similar users based on common book ratings, and make predictions using the average rating of top-k nearest neighbors. 


In [1]:
import warnings
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
%matplotlib inline
import seaborn as sns
warnings.filterwarnings('ignore')

books = pd.read_csv('Books.csv')
users = pd.read_csv('Users.csv')
ratings = pd.read_csv('Ratings.csv')

# Merge available datasets
books_rating = pd.merge(ratings, books, on='ISBN')
books_rating.drop(columns = ['Year-Of-Publication', 'Publisher', 'Book-Author', 'Image-URL-S', 'Image-URL-M', 'Image-URL-L'], axis = 1, inplace = True)
books_rating.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title
0,276725,034545104X,0,Flesh Tones: A Novel
1,2313,034545104X,5,Flesh Tones: A Novel
2,6543,034545104X,0,Flesh Tones: A Novel
3,8680,034545104X,5,Flesh Tones: A Novel
4,10314,034545104X,9,Flesh Tones: A Novel


### Compute count ratings for recommendations

In [2]:
TotalRatingCount = (books_rating.
                    groupby(by = ['Book-Title'])['Book-Rating']
                    .count()
                    .reset_index()
                    .rename(columns = {'Book-Rating': 'TotalRatingCount'})
                     ).dropna(axis = 0, subset = ['Book-Title'])
TotalRatingCount.head()

Unnamed: 0,Book-Title,TotalRatingCount
0,A Light in the Storm: The Civil War Diary of ...,4
1,Always Have Popsicles,1
2,Apple Magic (The Collector's series),1
3,"Ask Lily (Young Women of Faith: Lily Series, ...",1
4,Beyond IBM: Leadership Marketing and Finance ...,1


In [3]:
TotalRatings = books_rating.merge(TotalRatingCount, left_on = 'Book-Title', right_on = 'Book-Title', how = 'left')
TotalRatings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,TotalRatingCount
0,276725,034545104X,0,Flesh Tones: A Novel,60
1,2313,034545104X,5,Flesh Tones: A Novel,60
2,6543,034545104X,0,Flesh Tones: A Novel,60
3,8680,034545104X,5,Flesh Tones: A Novel,60
4,10314,034545104X,9,Flesh Tones: A Novel,60


In [4]:
pd.set_option('display.float_format', lambda x: '%.3f' % x)
print(TotalRatings['TotalRatingCount'].describe())

count   1031136.000
mean         69.782
std         175.338
min           1.000
25%           3.000
50%          13.000
75%          61.000
max        2502.000
Name: TotalRatingCount, dtype: float64


### Filter Recommendations
#### Example
    - US and Canada users
    - by Popularity (50)

In [5]:
popularity_threshold = 50
Top_Popularity  = TotalRatings.query('TotalRatingCount >= @popularity_threshold')

Top_Location_Popular = Top_Popularity.merge(users, left_on = 'User-ID', right_on = 'User-ID', how = 'left')

us_canada_ratings = Top_Location_Popular[Top_Location_Popular['Location'].str.contains("usa|canada")].drop('Age', axis=1).drop_duplicates(['User-ID', 'Book-Title'])

us_canada_ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,TotalRatingCount,Location
0,276725,034545104X,0,Flesh Tones: A Novel,60,"tyler, texas, usa"
1,2313,034545104X,5,Flesh Tones: A Novel,60,"cincinnati, ohio, usa"
2,6543,034545104X,0,Flesh Tones: A Novel,60,"strafford, missouri, usa"
3,8680,034545104X,5,Flesh Tones: A Novel,60,"st. charles county, missouri, usa"
4,10314,034545104X,9,Flesh Tones: A Novel,60,"beaverton, oregon, usa"


### kNN Recommendation Model
- Create User-Product Matrix

In [6]:
from scipy.sparse import csr_matrix

us_canada_pivot = us_canada_ratings.pivot(index   = 'Book-Title', 
                                          columns = 'User-ID', 
                                          values  = 'Book-Rating').fillna(0)

us_canada_matrix = csr_matrix(us_canada_pivot.values)

- Specify kNN model

In [7]:
from sklearn.neighbors import NearestNeighbors

model_kNN = NearestNeighbors(n_neighbors = 10,
                             metric      = 'cosine', 
                             algorithm   = 'brute').fit(us_canada_matrix)

- Random Query Book recommendations (examples)

In [8]:
random_query_index = np.random.choice(us_canada_pivot.shape[0])

distances, indices = model_kNN.kneighbors(us_canada_pivot.iloc[random_query_index,:].values.reshape(1,-1))

In [9]:
for i in range(0, len(distances.flatten())):
    if i == 0:
        print('Recommendations for {0}:\n'.format(us_canada_pivot.index[random_query_index]))
    else:
        print('{0}: {1}, with distance of {2}:'.format(i, us_canada_pivot.index[indices.flatten()[i]], distances.flatten()[i]))

Recommendations for Night:

1: Anne of Windy Poplars (Anne of Green Gables Novels (Paperback)), with distance of 0.8771425615762302:
2: Anne of Ingleside (Anne of Green Gables Novels (Paperback)), with distance of 0.8892375155485829:
3: Anne of Green Gables (Anne of Green Gables Novels (Paperback)), with distance of 0.9067074319034085:
4: A 2nd Helping of Chicken Soup for the Soul (Chicken Soup for the Soul Series (Paper)), with distance of 0.9079341961235701:
5: Anne's House of Dreams, with distance of 0.9132363153415785:
6: The Chosen, with distance of 0.9153579836752306:
7: All That Glitters (Landry), with distance of 0.9159943898130107:
8: Rain, with distance of 0.9209070488896678:
9: Degree of Guilt, with distance of 0.9219484887787073:
