In [None]:
"""
Created on 14th Sept 2020
@author: Shilpa Chavan
Algorithm : Recommender System Algorithm
Dataset : book1.csv
"""

In [1]:
import pandas as pd

#import Dataset 
book = pd.read_csv("D:\\Shilpa\\Datascience\\Assignments\\Recommendation System\\book1.csv",dtype=str,encoding='latin-1')
book.head(10)

Unnamed: 0.1,Unnamed: 0,User.ID,Book.Title,Book.Rating
0,1,276726,Classical Mythology,5
1,2,276729,Clara Callan,3
2,3,276729,Decision in Normandy,6
3,4,276736,Flu: The Story of the Great Influenza Pandemic...,8
4,5,276737,The Mummies of Urumchi,6
5,6,276744,The Kitchen God's Wife,7
6,7,276745,What If?: The World's Foremost Military Histor...,10
7,8,276747,PLEADING GUILTY,9
8,9,276747,Under the Black Flag: The Romance and the Real...,9
9,10,276747,Where You'll Find Me: And Other Stories,8


In [2]:
print(book.isnull().sum())

Unnamed: 0     0
User.ID        0
Book.Title     0
Book.Rating    0
dtype: int64


In [3]:
book.shape #shape

(10000, 4)

In [4]:
book.columns

Index(['Unnamed: 0', 'User.ID', 'Book.Title', 'Book.Rating'], dtype='object')

# Content-based Recommender System

In [5]:
from sklearn.feature_extraction.text import TfidfVectorizer #term frequencey- inverse document frequncy is a numerical 
#statistic that is intended to reflect how important a word is to document in a collecion or corpus
from sklearn.feature_extraction.text import CountVectorizer

# Creating a Tfidf Vectorizer to remove all stop words
#TfidfVectorizer which will basically convert our ‘Title’ (a text column ) into numerical
tfidf = TfidfVectorizer(stop_words="english")    #taking stop words from tfid vectorizer 


In [6]:
# replacing the NaN values in overview column with
# empty string
book["Book.Title"].isnull().sum() 
book["Book.Title"] = book["Book.Title"].fillna(" ")

# Preparing the Tfidf matrix by fitting and transforming
tfidf_matrix = tfidf.fit_transform(book["Book.Title"]) 
#Transfom a count matrix to a normalized tf or tf-idf representation
tfidf.get_feature_names()
tfidf_matrix.shape 
#(10000, 11436)

(10000, 11436)

In [7]:
# with the above matrix we need to find the 
# similarity score
# There are several metrics for this
# such as the euclidean, the Pearson and 
# the cosine similarity scores

# For now we will be using cosine similarity matrix
# A numeric quantity to represent the similarity 
# between 2 movies 
# Cosine similarity - metric is independent of 
# magnitude and easy to calculate 

# cosine(x,y)= (x.y⊺)/(||x||.||y||)

from sklearn.metrics.pairwise import linear_kernel

# Computing the cosine similarity on Tfidf matrix
cosine_sim_matrix = linear_kernel(tfidf_matrix,tfidf_matrix)

# creating a mapping of book name to index number 
book_index = pd.Series(book.index,index=book['Book.Title']).drop_duplicates()
book_index["Twilight"]


9998

In [9]:
def get_book_recommendations(Name,topN):
    
    # Getting the movie index using its title 
    book_id = book_index[Name]
    
    # Getting the pair wise similarity score for all the title's with that 
    # book titles
    cosine_scores = list(enumerate(cosine_sim_matrix[book_id]))
    #print("cosine_scores",cosine_scores)
    # Sorting the cosine_similarity scores based on scores 
    cosine_scores = sorted(cosine_scores,key=lambda x:x[1],reverse = True)
    
    # Get the scores of top 10 most similar book's
    cosine_scores_10 = cosine_scores[0:topN+1]
    
    # Getting the book index 
    book_idx  =  [i[0] for i in cosine_scores_10]
    book_scores =  [i[1] for i in cosine_scores_10]
    
    # Similar books and scores
    book_similar_show = pd.DataFrame(columns=["Name","Score"])
    book_similar_show["Name"] = book.loc[book_idx,"Book.Title"]
    book_similar_show["Score"] = book_scores
    book_similar_show.reset_index(inplace=True)  
    book_similar_show.drop(["index"],axis=1,inplace=True)
    #print (book_similar_show)
    return (book_similar_show)


In [10]:
# Enter your book and number of book's to be recommended 
get_book_recommendations("Twilight",topN=10)

Unnamed: 0,Name,Score
0,Twilight,1.0
1,Edge of Twilight,0.743196
2,Embrace the Twilight,0.677633
3,Dragons of Autumn Twilight (Dragonlance Chroni...,0.414123
4,Twilight (Star Trek Deep Space Nine: Mission G...,0.408478
5,Twilight over Burma: My Life As a Shan Princes...,0.379453
6,Moreau Omnibus: Forests of the Night/Emperors ...,0.341455
7,Classical Mythology,0.0
8,Clara Callan,0.0
9,Decision in Normandy,0.0
