In [3]:
import sqlite3
import pandas as pd
import numpy as np
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.metrics import mean_squared_error
from sklearn.model_selection import train_test_split
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import linear_kernel


conn2 = sqlite3.connect('library.db')
query = "SELECT title,author FROM library ;"

book_df = pd.read_sql_query(query, conn2)
books_tfidf = TfidfVectorizer(stop_words='english')

book_df['title'] = book_df['title'].fillna('')
book_df['author'] = book_df['author'].fillna('')
'''
Tf-idf Vectorizer Object removes all English stop words such as ‘the’, ‘a’ etc.
We are replacing the Null(empty) values with an empty string so that it doesn’t return an error message when training them.
Lastly, we are constructing the required Tf-idf matrix by fitting and transforming the data
'''

'\nTf-idf Vectorizer Object removes all English stop words such as ‘the’, ‘a’ etc.\nWe are replacing the Null(empty) values with an empty string so that it doesn’t return an error message when training them.\nLastly, we are constructing the required Tf-idf matrix by fitting and transforming the data\n'

In [4]:
book_Name_matrix = books_tfidf.fit_transform(book_df['title'])

In [5]:
cosine_similarity = linear_kernel(book_Name_matrix, book_Name_matrix)

In [6]:
indices = pd.Series(book_df['title'].index)
def recommendBasedOnName(index, cosine_sim=cosine_similarity):
    id = indices[index]
    # Get the pairwsie similarity scores of all books compared to that book, 
    # sorting them and getting top 5
    similarity_scores = list(enumerate(cosine_sim[id]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:4]

    # Get the books index
    books_index = [i[0] for i in similarity_scores]

    # Return the top 5 most similar books using integer-location based indexing (iloc)
    return book_df['title'].iloc[books_index]

In [11]:
book_author_matrix = books_tfidf.fit_transform(book_df['author'])
cosine_similarity_author = linear_kernel(book_author_matrix, book_author_matrix)
indices_2 = pd.Series(book_df['author'].index)
def recommendBasedOnAuthor(index, cosine_sim=cosine_similarity_author):
    id = indices_2[index]
    # Get the pairwsie similarity scores of all books compared to that book, 
    # sorting them and getting top 5
    similarity_scores = list(enumerate(cosine_sim[id]))
    similarity_scores = sorted(similarity_scores, key=lambda x: x[1], reverse=True)
    similarity_scores = similarity_scores[1:4]

    # Get the books index
    books_index = [i[0] for i in similarity_scores]

    # Return the top 5 most similar books using integer-location based indexing (iloc)
    return book_df['author'].iloc[books_index]


0    0
1    1
2    2
3    3
dtype: int64


In [12]:
def finalRecommend(index):
    return recommendBasedOnAuthor(index) + '\n' + recommendBasedOnName(index)
    

In [13]:
finalRecommend(2)

0                                                  NaN
1    J.K Rowling\nHarry Potter and the Philosophers...
2                                                  NaN
3                                                  NaN
dtype: object