## <div align="center"> Labsheet 03 - Collaborative Filtering for Book Recommendation System </div>

In [1]:
import pandas as pd 
from sklearn.neighbors import NearestNeighbors
import numpy as np
from scipy.sparse import csr_matrix

### Load the book dataset into data frame and build a Utility matrix

In [2]:
df = pd.read_csv('book_dataset.csv')
print(df.head())

#count the total rows
total_record_count = df['book_id'].count()

# Count of unique books
unique_books_count = df['book'].nunique()
print(f"\n Number of Unique Books: {unique_books_count} out of total books : {total_record_count}")

# Count of unique users
total_users_count = df['user'].count()
unique_users_count = df['user'].nunique()
print(f"\n Number of Unique Users: {unique_users_count} out of total user : {total_record_count}")


   rating  book_id                           user_id  user  book
0       5       21  75a3f1cd17ac5f6f2635756805fe7046     0     0
1       4     4671  75a3f1cd17ac5f6f2635756805fe7046     0     1
2       4    19543  75a3f1cd17ac5f6f2635756805fe7046     0     2
3       4    30119  75a3f1cd17ac5f6f2635756805fe7046     0     3
4       3     5470  75a3f1cd17ac5f6f2635756805fe7046     0     4

 Number of Unique Books: 7855 out of total books : 374108

 Number of Unique Users: 623 out of total user : 374108


### Build a Utility matrix table using pandas pivot function and output first 5 records

In [3]:
# Create a utility matrix using pandas and display the first 5 row
utility_matrix_table = pd.pivot_table(df, values='rating', index='user', columns='book', fill_value=0)

#utility_matrix_table.to_csv('utility_matrix.csv') #write to csv
utility_matrix_table.head(5) 

book,0,1,2,3,4,5,6,7,8,9,...,7845,7846,7847,7848,7849,7850,7851,7852,7853,7854
user,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
0,5,4,4,4,3,5,5,1,4,4,...,0,0,0,0,0,0,0,0,0,0
1,0,3,0,5,5,0,3,0,0,0,...,0,0,0,0,0,0,0,0,0,0
2,0,0,0,0,0,5,0,0,3,3,...,0,0,0,0,0,0,0,0,0,0
3,0,4,0,4,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0
4,0,0,0,0,0,0,0,0,0,0,...,0,0,0,0,0,0,0,0,0,0


#### Build an item based collaborative filtering recommendation system.

In [4]:
query_index = np.random.choice(utility_matrix_table.shape[0])
print(f"The chosen user from the matrix is : {query_index}")

# Transpose the matrix to get a user-item matrix
user_item_matrix = utility_matrix_table.T
#print(user_item_matrix)

user_item_matrix_table = csr_matrix(user_item_matrix.values)

#print(user_item_matrix_table)

# Fit Nearest Neighbors model
model = NearestNeighbors(metric='cosine', algorithm='brute')
model.fit(user_item_matrix_table)

distances, indices = model.kneighbors(user_item_matrix.iloc[query_index,:].values.reshape(1,-1),n_neighbors = 4)
#print(distances, indices)

# Extract user indices and distances for those indices
user = [user_item_matrix.index[i] for i in indices.flatten()[1:]]
distance = distances.flatten()[1:]

#print(user, distance)

#creating another datafame for storing the new data
m = pd.Series(user, name='user')
d = pd.Series(distance, name='distance')
recommend = pd.concat([m, d], axis=1)
recommend = recommend.sort_values('distance', ascending=True)

# prionting recommendation for the user which has smaller, as we are using cosine simmilarity the smaller the better
print(f'\nOther Users having simmilar taste like User {user_item_matrix.index[query_index]}:\n')
for i in range(recommend.shape[0]):
    print(f'{recommend["user"].iloc[i]}, with distance of {recommend["distance"].iloc[i]}')    

The chosen user from the matrix is : 17

Other Users having simmilar taste like User 17:

418, with distance of 0.3163062858524196
356, with distance of 0.329773624016255
355, with distance of 0.33274113627096336
