In [1]:
import numpy as np
import pandas as pd
from scipy.sparse import csr_matrix
from sklearn.neighbors import NearestNeighbors
import sklearn

#### Note:
In recommendation system, it is advised or mandate to use the spare matrix but in a compressed manner.
- csr_matrix(): Compressed Sparse Matrix.
- In cosine, shorter the distance then larger in similarity.

In [2]:
book=pd.read_csv('C:/Users/Subhadri/Desktop/Data Science/Term 1/Machine Learning/Datasets/book_recom.csv')
book.head()

Unnamed: 0,userID,ISBN,bookRating,bookTitle,totalRatingCount,Location
0,276725,034545104X,0,Flesh Tones: A Novel,60,"tyler, texas, usa"
1,2313,034545104X,5,Flesh Tones: A Novel,60,"cincinnati, ohio, usa"
2,6543,034545104X,0,Flesh Tones: A Novel,60,"strafford, missouri, usa"
3,8680,034545104X,5,Flesh Tones: A Novel,60,"st. charles county, missouri, usa"
4,10314,034545104X,9,Flesh Tones: A Novel,60,"beaverton, oregon, usa"


In [3]:
book.shape

(26500, 6)

In [4]:
book.columns

Index(['userID', 'ISBN', 'bookRating', 'bookTitle', 'totalRatingCount',
       'Location'],
      dtype='object')

In [5]:
book['bookTitle'].nunique()

198

In [6]:
book['userID'].nunique()

11337

In [7]:
data_pivot=(book.pivot(index='bookTitle',columns='userID',
                      values='bookRating').fillna(0))

In [8]:
data_pivot.shape

(198, 11337)

In [9]:
data_pivot.head()

userID,14,23,26,51,67,99,135,243,254,256,...,278483,278514,278535,278552,278582,278633,278740,278769,278773,278843
bookTitle,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
1st to Die: A Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,10.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"A Child Called \It\"": One Child's Courage to Survive""",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Cold Heart: An Alex Delaware Novel,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Kiss of Shadows (Meredith Gentry Novels (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
A Man in Full,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [10]:
book_matrix=csr_matrix(data_pivot.values)
book_matrix                             #out of 198x11337 elements,11110 elements are information cells rest are zero values

<198x11337 sparse matrix of type '<class 'numpy.float64'>'
	with 11110 stored elements in Compressed Sparse Row format>

In [11]:
model_knn=NearestNeighbors(metric='cosine',algorithm='brute')
#brute will directly compute the information cells only and will not waste time for zero value cells and will help in reducing
#more usage of space and time complexity
model_knn.fit(book_matrix) 

NearestNeighbors(algorithm='brute', leaf_size=30, metric='cosine',
                 metric_params=None, n_jobs=None, n_neighbors=5, p=2,
                 radius=1.0)

### Recommendation for reading:
- Find the 5 similar (with maximum similarity - means lowest distance) books as that of the first book '1st to Die: A Novel'.
- The function kneighbors() is uused  to find k number of neighbors of a point.
- We need bto cpmpute 6 neighbors from book1 to all other books. Note that the shortest distance possible now is 0, which is book1 to book1 itself. Hence, we need to take 6 neighbors, instead of 5.

In [12]:
query_index1=0 #1st book
distances,indices=model_knn.kneighbors(
    data_pivot.iloc[query_index1,:].values.reshape(1,-1),
    n_neighbors=6)

In [13]:
#Display the distances from shorter to longer:
distances

array([[0.        , 0.85554748, 0.89964868, 0.90149976, 0.90925394,
        0.92285717]])

In [14]:
#Display the indices(book positions) according to the respective distance found above:
indices

array([[  0, 110,  12, 188,  26,  78]], dtype=int64)

In [15]:
indices.flatten()

array([  0, 110,  12, 188,  26,  78], dtype=int64)

In [16]:
print('Recommendations for {0}\n'.format(data_pivot.index[query_index1]))
for i in indices.flatten()[1:]:
    print('For',[i],'th position, the book is:',data_pivot.index[i])

Recommendations for 1st to Die: A Novel

For [110] th position, the book is: Roses Are Red (Alex Cross Novels)
For [12] th position, the book is: Along Came a Spider (Alex Cross Novels)
For [188] th position, the book is: Violets Are Blue
For [26] th position, the book is: Cat &amp; Mouse (Alex Cross Novels)
For [78] th position, the book is: Jack &amp; Jill (Alex Cross Novels)
