In [None]:
#A Basic Collaborative Filtering based recommendation system.

In [None]:
#Mandatory Imports
import os

%matplotlib inline
import pandas as pd
import numpy as np
from numpy import int64

import requests
import IPython.display as Disp

import sklearn
from sklearn.decomposition import TruncatedSVD

In [None]:
books = pd.read_csv("books.csv")
ratings = pd.read_csv("ratings.csv", encoding='UTF-8', dtype={'user_id': int,'book_id':int,
'rating':int} )

In [None]:
print(books.head())
print(ratings.head())

In [None]:
print(books.shape)
print(ratings.shape)

In [None]:
print(books.info())
print(ratings.info())

In [None]:
books_df = books[['book_id', 'books_count', 'original_publication_year',
'average_rating','original_title','image_url','authors']]
books_df.head()

In [None]:
ratings.rating.hist(bins=5)

In [None]:
ratings.rating.hist(bins=10)

In [None]:
ratings.describe()

In [None]:
ratings.groupby('user_id')['rating'].count()

In [None]:
# Merge books and ratings dataset
books_ratings = pd.merge(ratings,books,on='book_id')
books_ratings.head()

In [None]:
books_ratings.shape

In [None]:
books_ratings.groupby('book_id')['rating'].count().sort_values(ascending=False)

In [None]:
books_ratings_ct = books_ratings.pivot_table(values='rating',index='user_id',columns='original_title',fill_value=0)
books_ratings_ct.head()

In [None]:
books_ratings_ct.shape#28554,794

In [None]:
#If we take a look at the shape of the books_ratings_ct
#28554 userids----rows
#794 book titles--------columns
#If we try to get the Pearsons correlation matrix with these dimensions,
#it will be computationally very high and not feasible.
#Hence, we need to do dimensionality reduction by using SingleValueDecomposition.
#But the TruncatedSVD class performs decomposition only on columns.
#Here, we have book titles as columns which we want to retain for recommending.
#Whereas userids can be reduced for better efficiency.
#Using transpose, interchange rows to columns and vice versa.
#Then apply SVD.

print(type(books_ratings_ct.T))
X = books_ratings_ct.values.T
X.shape

In [None]:
# Compress dataset by applying Singular Value Decomposistion (SVD)
# BAsically, SVD is a dimensionality reduction technique which reduces dimensions by trying to 
#categorize them and without losing the significance of important features.
#At core, its a linear algebra method that decompose a sparse utility matrix into three
#compressed matrices.
SVD = TruncatedSVD(n_components=20,random_state=17)
result_matrix = SVD.fit_transform(X)
result_matrix.shape

In [None]:
# Create Pearson coorelation matrix
corr_mat = np.corrcoef(result_matrix)
corr_mat.shape

In [None]:
type(corr_mat)

In [None]:
corr_mat[0]

In [None]:
corr_mat[1,]

In [None]:
# Print list of books
book_names=books_ratings_ct.columns
book_list = list(book_names)
book_list

In [None]:
def printRecommendedBooks(bookName):
    bookNameIndex = book_list.index(bookName)
    corrBookName = corr_mat[bookNameIndex]
    filterThreshold = (corrBookName<1.0) & (corrBookName>0.8)
    return list(book_names[filterThreshold])

In [None]:
printRecommendedBooks("Plum Lovin'")

In [None]:
printRecommendedBooks('A Christmas Carol')

In [None]:
printRecommendedBooks('The Fountainhead')