# Importing the required libraries

In [1]:
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt

# Loading the book dataset

In [2]:
books = pd.read_csv(r'C:\Users\Binita Mandal\Desktop\finity\Recommondation rules\book.csv',encoding ='latin1',index_col=0)

In [3]:
# Lets find out first 5 rows
books.head()

Unnamed: 0,User.ID,Book.Title,Book.Rating
1,276726,Classical Mythology,5
2,276729,Clara Callan,3
3,276729,Decision in Normandy,6
4,276736,Flu: The Story of the Great Influenza Pandemic...,8
5,276737,The Mummies of Urumchi,6


In [4]:
# Now the last 5 rows
books.tail()

Unnamed: 0,User.ID,Book.Title,Book.Rating
9996,162121,American Fried: Adventures of a Happy Eater.,7
9997,162121,Cannibal In Manhattan,9
9998,162121,How to Flirt: A Practical Guide,7
9999,162121,Twilight,8
10000,162129,Kids Say the Darndest Things,6


In [5]:
# Shape of the data
books.shape

(10000, 3)

In [6]:
# Size of the data
books.size

30000

In [7]:
# Columns of the data
books.columns

Index(['User.ID', 'Book.Title', 'Book.Rating'], dtype='object')

In [8]:
# Lets rename the columns
books1=books.rename(columns = {"User.ID": "userid", 
                                  "Book.Title":"title", 
                                  "Book.Rating": "rating"})

In [9]:
# Lets see if get the output correct
books1.columns

Index(['userid', 'title', 'rating'], dtype='object')

In [10]:
# Yes we did, now first 5 rows again
books1.head()

Unnamed: 0,userid,title,rating
1,276726,Classical Mythology,5
2,276729,Clara Callan,3
3,276729,Decision in Normandy,6
4,276736,Flu: The Story of the Great Influenza Pandemic...,8
5,276737,The Mummies of Urumchi,6


In [12]:
books1.describe()

Unnamed: 0,userid,rating
count,10000.0,10000.0
mean,95321.2498,7.5663
std,117645.703609,1.82152
min,8.0,1.0
25%,2103.0,7.0
50%,3757.0,8.0
75%,162052.0,9.0
max,278854.0,10.0


In [13]:
from sklearn.feature_extraction.text import TfidfVectorizer 

In [14]:
# Creating a Tfidf Vectorizer to remove all stop words
tfidf = TfidfVectorizer(stop_words="english")

In [15]:
# Replacing the NaN values in overview column with empty strings

books1["title"].isnull().sum() 
books1["title"] = books1["title"].fillna(" ")

In [16]:
# Preparing the Tfidf matrix by fitting and transforming

tfidf_matrix = tfidf.fit_transform(books1.title)   

In [17]:
# Shape of this will be
tfidf_matrix.shape 

(10000, 11435)

In [18]:
from sklearn.metrics.pairwise import linear_kernel

# Lets compute the cosine similarity on Tfidf matrix
cosine_sim_matrix = linear_kernel(tfidf_matrix,tfidf_matrix)

In [19]:
# Creating a mapping of  name to index number 
books_index = pd.Series(books1.index,index=books1['title'])

In [22]:
# Now lets see what we have got so far
books_index

title
Classical Mythology                                                                                       1
Clara Callan                                                                                              2
Decision in Normandy                                                                                      3
Flu: The Story of the Great Influenza Pandemic of 1918 and the Search for the Virus That Caused It        4
The Mummies of Urumchi                                                                                    5
                                                                                                      ...  
American Fried: Adventures of a Happy Eater.                                                           9996
Cannibal In Manhattan                                                                                  9997
How to Flirt: A Practical Guide                                                                        9998
Twilight              

In [23]:
books_index["Classical Mythology"]

1

# Lets get final book recommendation with following codes

In [24]:
from sklearn.metrics.pairwise import linear_kernel

# Computing the cosine similarity on Tfidf matrix
cosine_sim_matrix = linear_kernel(tfidf_matrix,tfidf_matrix)

# creating a mapping of book name to index number 
books1_index = pd.Series(books1.index,index=books1['title'])


books_index["Classical Mythology"]

def get_books1_recommendations(title,topN):
    
   
    #topN = 10
    # Getting the books index using its title 
    userid = books1_index[title]
    
    # Getting the pair wise similarity score 
    cosine_scores = list(enumerate(cosine_sim_matrix[userid]))
    
    # Sorting the cosine_similarity scores based on scores 
    cosine_scores = sorted(cosine_scores,key=lambda x:x[1],reverse = True)
    
    # Get the scores of top 10 most similar books 
    cosine_scores_10 = cosine_scores[0:topN+1]
    
    # Getting the books index 
    book_idx  =  [i[0] for i in cosine_scores_10]
    book_scores =  [i[1] for i in cosine_scores_10]
    
    # Similar titles and scores
    book_similar_show = pd.DataFrame(columns=["title","Score"])
    book_similar_show["title"] = books1.loc[book_idx,"title"]
    book_similar_show["Score"] = book_scores
    book_similar_show.reset_index(inplace=True)  
    book_similar_show.drop(["index"],axis=1,inplace=True)
    print (book_similar_show)
  

    
 
get_books1_recommendations
get_books1_recommendations('Decision in Normandy',topN=3)

                                               title     Score
0                               Decision in Normandy  1.000000
1                                Mine Eyes Have Seen  0.230309
2                                          Eve's Rib  0.218970
3  Who Will Tell The People? : The Betrayal Of Am...  0.204658


### So here we have got 'Decision in Normandy' as a book recommendation