In [1]:
#### Import required packages
import pandas as pd
import numpy as np 
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors

In [2]:
#### Read in data and inspect the first 5 records.
data = pd.read_csv('./Data/book_recom.csv')
data.head()

Unnamed: 0,userID,ISBN,bookRating,bookTitle,totalRatingCount,Location
0,276725,034545104X,0,Flesh Tones: A Novel,60,"tyler, texas, usa"
1,2313,034545104X,5,Flesh Tones: A Novel,60,"cincinnati, ohio, usa"
2,6543,034545104X,0,Flesh Tones: A Novel,60,"strafford, missouri, usa"
3,8680,034545104X,5,Flesh Tones: A Novel,60,"st. charles county, missouri, usa"
4,10314,034545104X,9,Flesh Tones: A Novel,60,"beaverton, oregon, usa"


In [3]:
#### Checking shape of dataset
data.shape

(26500, 6)

### CREATING A PIVOT TABLE 

In [4]:
#### Pivot table between 'userID','bookTitle' and 'bookRating'
data_pivot = data.pivot( values='bookRating', index='bookTitle',
                    columns='userID').fillna(0)
data_pivot.head()

userID,14,23,26,51,67,99,135,243,254,256,...,278483,278514,278535,278552,278582,278633,278740,278769,278773,278843
bookTitle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"A Child Called \It\"": One Child's Courage to Survive""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Cold Heart: An Alex Delaware Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Kiss of Shadows (Meredith Gentry Novels (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Man in Full,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [5]:
#### Checking shape of the pivot table
data_pivot.shape

(198, 11337)

In [6]:
#### Compressing pivot table using Compressed Sparse Matrix
book_data_matrix = csr_matrix(data_pivot.values)
book_data_matrix

<198x11337 sparse matrix of type '<class 'numpy.float64'>'
	with 11110 stored elements in Compressed Sparse Row format>

### COMPUTING ITEM SIMILARITIES USING COSINE METRICS

In [7]:
#### fitting knn cosine similarity
model_knn = NearestNeighbors(metric='cosine')
model_knn.fit(book_data_matrix)

NearestNeighbors(metric='cosine')

# Recommendation for reading:
* Find the 5 similar(with maximun similarity-means lowest distane) books as that of the first book '1st to Die: A novel.'
* The function kneighbors() is used to find k number of neighbors of a point.
* We need to compute 6 neighbors from book1 to all other books. Note that the shortest distance possible now is 0, which is book1 to book1 itself. Hence, we need to take 6 neighbors, instead of 5.

In [8]:
#### 5 kneighbors for book-1
query_index1=0   # 1st book
distances, indices = model_knn.kneighbors(
    data_pivot.iloc[query_index1,:].values.reshape(1,-1),
    n_neighbors=6)

In [9]:
#### display the distances
distances

array([[0.        , 0.85554748, 0.89964868, 0.90149976, 0.90925394,
        0.92285717]])

In [10]:
#### display respective book indices
indices

array([[  0, 110,  12, 188,  26,  78]], dtype=int64)

In [11]:
#### printing 5 similar books for book-1
print('Recommendations for {0}\n'.format(data_pivot.index[query_index1]))
for i in indices.flatten()[1:]:
    print('\t',data_pivot.index[i])

Recommendations for 1st to Die: A Novel

	 Roses Are Red (Alex Cross Novels)
	 Along Came a Spider (Alex Cross Novels)
	 Violets Are Blue
	 Cat &amp; Mouse (Alex Cross Novels)
	 Jack &amp; Jill (Alex Cross Novels)


In [12]:
#### filtering the users who give book-1 rating more than 1
list1=[]
for i in range(11337):
    if(data_pivot.iloc[0].values[i]>5):
        list1.append(str(data_pivot.iloc[0].index[i]))
print(','.join(list1))

6074,6543,9178,9417,9492,16795,17229,20445,27399,27472,28899,30276,33124,35859,37377,37874,43246,43626,45284,46417,48732,50784,51207,51450,52159,53220,55187,55487,57833,58224,62542,67288,69389,69512,70414,75825,77480,77856,81216,82497,83287,84024,93363,94965,95359,96843,99204,101041,104144,104636,107853,110912,115435,115948,123115,124048,125519,128085,128915,134761,136382,138232,142579,143175,143294,144727,144953,146386,148344,152651,154944,161744,163134,164858,167934,170652,171697,174367,177458,178035,178834,180658,180927,184152,184513,187262,187624,189516,189558,190925,191178,192428,194719,196202,196886,199515,204591,204753,207750,211359,213150,215820,216442,218286,219726,222035,222220,223644,225763,225810,227428,228764,230505,230949,232131,232945,234174,235282,236426,236606,236782,237089,240207,242143,243100,246513,250947,251378,254201,256915,256989,257419,258185,258534,261829,266056,266109,267642,273820,278535


In [13]:
#### Another way to filtering the users who give book-1 rating more than 1
data_1stnovel=data_pivot.iloc[0].reset_index()
data_1stnovel[data_1stnovel['1st to Die: A Novel']>5]

Unnamed: 0,userID,1st to Die: A Novel
188,6074,8.0
211,6543,9.0
313,9178,8.0
324,9417,7.0
327,9492,10.0
...,...,...
10826,266056,7.0
10829,266109,10.0
10893,267642,8.0
11114,273820,6.0
