# LOADING DATASET

In [34]:
import numpy as np 
import pandas as pd
import matplotlib.pyplot as plt
import os
from sklearn.model_selection import train_test_split

In [35]:
books = pd.read_csv('Books.csv')
ratings = pd.read_csv('Ratings.csv')
users = pd.read_csv('Users.csv')

  exec(code_obj, self.user_global_ns, self.user_ns)


# EDA

In [36]:
books.info()
ratings.info()
users.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 271360 entries, 0 to 271359
Data columns (total 8 columns):
 #   Column               Non-Null Count   Dtype 
---  ------               --------------   ----- 
 0   ISBN                 271360 non-null  object
 1   Book-Title           271360 non-null  object
 2   Book-Author          271359 non-null  object
 3   Year-Of-Publication  271360 non-null  object
 4   Publisher            271358 non-null  object
 5   Image-URL-S          271360 non-null  object
 6   Image-URL-M          271360 non-null  object
 7   Image-URL-L          271357 non-null  object
dtypes: object(8)
memory usage: 16.6+ MB
<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1149780 entries, 0 to 1149779
Data columns (total 3 columns):
 #   Column       Non-Null Count    Dtype 
---  ------       --------------    ----- 
 0   User-ID      1149780 non-null  int64 
 1   ISBN         1149780 non-null  object
 2   Book-Rating  1149780 non-null  int64 
dtypes: int64(2), obje

In [37]:
books.head()

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,195153448,Classical Mythology,Mark P. O. Morford,2002,Oxford University Press,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...,http://images.amazon.com/images/P/0195153448.0...
1,2005018,Clara Callan,Richard Bruce Wright,2001,HarperFlamingo Canada,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...,http://images.amazon.com/images/P/0002005018.0...
2,60973129,Decision in Normandy,Carlo D'Este,1991,HarperPerennial,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...,http://images.amazon.com/images/P/0060973129.0...
3,374157065,Flu: The Story of the Great Influenza Pandemic...,Gina Bari Kolata,1999,Farrar Straus Giroux,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...,http://images.amazon.com/images/P/0374157065.0...
4,393045218,The Mummies of Urumchi,E. J. W. Barber,1999,W. W. Norton &amp; Company,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...,http://images.amazon.com/images/P/0393045218.0...


In [38]:
ratings.head()

Unnamed: 0,User-ID,ISBN,Book-Rating
0,276725,034545104X,0
1,276726,0155061224,5
2,276727,0446520802,0
3,276729,052165615X,3
4,276729,0521795028,6


In [39]:
users.head()

Unnamed: 0,User-ID,Location,Age
0,1,"nyc, new york, usa",
1,2,"stockton, california, usa",18.0
2,3,"moscow, yukon territory, russia",
3,4,"porto, v.n.gaia, portugal",17.0
4,5,"farnborough, hants, united kingdom",


# PREPROCESSING

In [40]:
#checking for invalid entries
try:
    books['Year-Of-Publication']  = books['Year-Of-Publication'].astype(int)
except Exception as e:
    print(e)

invalid literal for int() with base 10: 'DK Publishing Inc'


In [41]:
books[books['Year-Of-Publication'] == 'DK Publishing Inc']

Unnamed: 0,ISBN,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
209538,078946697X,"DK Readers: Creating the X-Men, How It All Beg...",2000,DK Publishing Inc,http://images.amazon.com/images/P/078946697X.0...,http://images.amazon.com/images/P/078946697X.0...,http://images.amazon.com/images/P/078946697X.0...,
221678,0789466953,"DK Readers: Creating the X-Men, How Comic Book...",2000,DK Publishing Inc,http://images.amazon.com/images/P/0789466953.0...,http://images.amazon.com/images/P/0789466953.0...,http://images.amazon.com/images/P/0789466953.0...,


In [42]:
#deleting the entries with errors
books['Year-Of-Publication'] = pd.to_numeric(books['Year-Of-Publication'],errors='coerce')
books = books.dropna()
books['Year-Of-Publication'] = books['Year-Of-Publication'].astype(int)

In [43]:
try:
    books['Year-Of-Publication']  = books['Year-Of-Publication'].astype(int)
except Exception as e:
    print(e)

In [44]:
#checking for duplicate values
print(books.duplicated().sum())
print(users.duplicated().sum())
print(ratings.duplicated().sum())

0
0
0


In [45]:
#checking for null values
print(books.isnull().sum())
print(users.isnull().sum())
print(ratings.isnull().sum())

ISBN                   0
Book-Title             0
Book-Author            0
Year-Of-Publication    0
Publisher              0
Image-URL-S            0
Image-URL-M            0
Image-URL-L            0
dtype: int64
User-ID          0
Location         0
Age         110762
dtype: int64
User-ID        0
ISBN           0
Book-Rating    0
dtype: int64


In [46]:
print('Number of data before cleaning : {}'.format(len(ratings)))
ratings = ratings[ratings['ISBN'].isin(books['ISBN'])]
print('Number of data after cleaning : {}'.format(len(ratings)))

Number of data before cleaning : 1149780
Number of data after cleaning : 1031129


In [47]:
f = ['count','mean']

df_books_summary = ratings.groupby('ISBN')['Book-Rating'].agg(f)
df_books_summary.index = df_books_summary.index.map(str)

drop_book_list = df_books_summary[df_books_summary['count'] < 10].index

df_cust_summary = ratings.groupby('User-ID')['Book-Rating'].agg(f)
df_cust_summary.index = df_cust_summary.index.map(int)

drop_cust_list = df_cust_summary[df_cust_summary['count'] < 10].index

###### The books that have less that 10 ratings have been deleted

In [48]:
print('Before Filtering: {}'.format(ratings.shape))
ratings = ratings[~ratings['ISBN'].isin(drop_book_list)]
ratings = ratings[~ratings['User-ID'].isin(drop_cust_list)]
print('After Filtering: {}'.format(ratings.shape))

Before Filtering: (1031129, 3)
After Filtering: (428085, 3)


In [49]:
ratings_with_name = ratings.merge(books,on='ISBN')
ratings_with_name.drop(columns=["ISBN","Image-URL-S","Image-URL-M"],axis=1,inplace=True)
complete_df = ratings_with_name.merge(users.drop("Age", axis=1), on="User-ID")
complete_df.head()

Unnamed: 0,User-ID,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-L,Location
0,276762,0,Southampton Row (Charlotte &amp; Thomas Pitt N...,Anne Perry,2002,Ballantine Books,http://images.amazon.com/images/P/034544003X.0...,"duisburg, nordrhein-westfalen, germany"
1,276762,5,See Jane Run,Joy Fielding,1992,Avon,http://images.amazon.com/images/P/0380711524.0...,"duisburg, nordrhein-westfalen, germany"
2,276762,0,The Dark Half,Stephen King,1994,Signet Book,http://images.amazon.com/images/P/0451167317.0...,"duisburg, nordrhein-westfalen, germany"
3,29259,0,Southampton Row (Charlotte &amp; Thomas Pitt N...,Anne Perry,2002,Ballantine Books,http://images.amazon.com/images/P/034544003X.0...,"mt. airy, , usa"
4,29259,0,"Artemis Fowl (Artemis Fowl, Book 1)",Eoin Colfer,2002,Miramax Kids,http://images.amazon.com/images/P/0786817070.0...,"mt. airy, , usa"


In [50]:
num_rating_df = complete_df.groupby('Book-Title').count()['Book-Rating'].reset_index()
num_rating_df.rename(columns={'Book-Rating': 'num_ratings'}, inplace=True)
avg_rating_df = complete_df.groupby('Book-Title').mean()['Book-Rating'].reset_index()
avg_rating_df.rename(columns={'Book-Rating': 'avg_ratings'}, inplace=True)
popularity_df = num_rating_df.merge(avg_rating_df, on='Book-Title')
top_10 = popularity_df[popularity_df['num_ratings']>=250].sort_values("avg_ratings",ascending=False).head(10)
final_top_10 = top_10.merge(books, on='Book-Title').drop_duplicates('Book-Title')[['Book-Title','Book-Author','Publisher','num_ratings','avg_ratings','Image-URL-M']]
final_top_10.head(10)

Unnamed: 0,Book-Title,Book-Author,Publisher,num_ratings,avg_ratings,Image-URL-M
0,Harry Potter and the Prisoner of Azkaban (Book 3),J. K. Rowling,Scholastic,341,5.750733,http://images.amazon.com/images/P/0439136350.0...
3,Harry Potter and the Goblet of Fire (Book 4),J. K. Rowling,Scholastic,316,5.724684,http://images.amazon.com/images/P/0439139597.0...
5,Harry Potter and the Order of the Phoenix (Boo...,J. K. Rowling,Scholastic,278,5.377698,http://images.amazon.com/images/P/043935806X.0...
9,Harry Potter and the Chamber of Secrets (Book 2),J. K. Rowling,Scholastic,457,5.013129,http://images.amazon.com/images/P/0439064872.0...
12,The Fellowship of the Ring (The Lord of the Ri...,J.R.R. TOLKIEN,Del Rey,259,4.571429,http://images.amazon.com/images/P/0345339703.0...
21,Harry Potter and the Sorcerer's Stone (Harry P...,J. K. Rowling,Arthur A. Levine Books,420,4.369048,http://images.amazon.com/images/P/059035342X.0...
23,To Kill a Mockingbird,Harper Lee,Little Brown &amp; Company,383,4.274151,http://images.amazon.com/images/P/0446310786.0...
31,The Da Vinci Code,Dan Brown,Doubleday,626,4.271565,http://images.amazon.com/images/P/0385504209.0...
37,The Catcher in the Rye,J.D. Salinger,"Little, Brown",292,4.10274,http://images.amazon.com/images/P/0316769487.0...
44,The Five People You Meet in Heaven,Mitch Albom,Hyperion,317,4.031546,http://images.amazon.com/images/P/0786868716.0...


In [51]:
train_ratings, test_ratings = train_test_split(ratings, test_size=0.2, random_state=42)
#merging training data
df = pd.merge(train_ratings, books, on='ISBN')
df.head(3)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,187065,812540336,0,"The Visitant (The Anasazi Mysteries, Book 1)",Kathleen O'Neal Gear,2000,Tor Books,http://images.amazon.com/images/P/0812540336.0...,http://images.amazon.com/images/P/0812540336.0...,http://images.amazon.com/images/P/0812540336.0...
1,204864,812540336,0,"The Visitant (The Anasazi Mysteries, Book 1)",Kathleen O'Neal Gear,2000,Tor Books,http://images.amazon.com/images/P/0812540336.0...,http://images.amazon.com/images/P/0812540336.0...,http://images.amazon.com/images/P/0812540336.0...
2,44845,812540336,0,"The Visitant (The Anasazi Mysteries, Book 1)",Kathleen O'Neal Gear,2000,Tor Books,http://images.amazon.com/images/P/0812540336.0...,http://images.amazon.com/images/P/0812540336.0...,http://images.amazon.com/images/P/0812540336.0...


In [52]:
#merging testing data
testdf = pd.merge(test_ratings, books, on='ISBN')
testdf = testdf[:-1]
testdf.head(3)

Unnamed: 0,User-ID,ISBN,Book-Rating,Book-Title,Book-Author,Year-Of-Publication,Publisher,Image-URL-S,Image-URL-M,Image-URL-L
0,43619,440221315,9,The Gift,DANIELLE STEEL,1996,Dell,http://images.amazon.com/images/P/0440221315.0...,http://images.amazon.com/images/P/0440221315.0...,http://images.amazon.com/images/P/0440221315.0...
1,201674,440221315,0,The Gift,DANIELLE STEEL,1996,Dell,http://images.amazon.com/images/P/0440221315.0...,http://images.amazon.com/images/P/0440221315.0...,http://images.amazon.com/images/P/0440221315.0...
2,112026,440221315,0,The Gift,DANIELLE STEEL,1996,Dell,http://images.amazon.com/images/P/0440221315.0...,http://images.amazon.com/images/P/0440221315.0...,http://images.amazon.com/images/P/0440221315.0...


# ITEM BASED COLLABORATIVE FILTERING

In [53]:
#creating a pivot table with the books as rows and the ratings as columns
pivot_rating = df.pivot_table(values = 'Book-Rating',index='Book-Title', columns = 'User-ID' )
pivot_rating.head()

User-ID,8,99,242,243,254,383,388,408,424,446,...,278522,278535,278554,278563,278582,278633,278637,278771,278843,278851
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",,,,,,,,,,,...,,,,,,,,,,
"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750",,,,,,,,,,,...,,,,,,,,,,
Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),,,,,,,,,,,...,,,,,,,,,,
"Q-Space (Star Trek The Next Generation, Book 47)",,,,,,,,,,,...,,,,,,,,,,
"Q-Zone (Star Trek The Next Generation, Book 48)",,,,,,,,,,,...,,,,,,,,,,


In [54]:
testpivot_rating = testdf.pivot_table(values = 'Book-Rating',index='Book-Title', columns = 'User-ID' )
testpivot_rating.head()

User-ID,8,99,242,243,254,383,388,408,424,446,...,278418,278506,278522,278535,278554,278563,278582,278633,278771,278843
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",,,,,,,,,,,...,,,,,,,,,,
Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),,,,,,,,,,,...,,,,,,,,,,
"Q-Space (Star Trek The Next Generation, Book 47)",,,,,,,,,,,...,,,,,,,,,,
"Q-Zone (Star Trek The Next Generation, Book 48)",,,,,,,,,,,...,,,,,,,,,,
!Yo!,,,,,,,,,,,...,,,,,,,,,,


In [55]:
pivot_rating = pivot_rating.fillna(0)
pivot_rating.head()

User-ID,8,99,242,243,254,383,388,408,424,446,...,278522,278535,278554,278563,278582,278633,278637,278771,278843,278851
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Good Wives: Image and Reality in the Lives of Women in Northern New England, 1650-1750",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Space (Star Trek The Next Generation, Book 47)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Zone (Star Trek The Next Generation, Book 48)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [56]:
testpivot_rating = testpivot_rating.fillna(0)
testpivot_rating.head()

User-ID,8,99,242,243,254,383,388,408,424,446,...,278418,278506,278522,278535,278554,278563,278582,278633,278771,278843
Book-Title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1,Unnamed: 9_level_1,Unnamed: 10_level_1,Unnamed: 11_level_1,Unnamed: 12_level_1,Unnamed: 13_level_1,Unnamed: 14_level_1,Unnamed: 15_level_1,Unnamed: 16_level_1,Unnamed: 17_level_1,Unnamed: 18_level_1,Unnamed: 19_level_1,Unnamed: 20_level_1,Unnamed: 21_level_1
"Earth Prayers From around the World: 365 Prayers, Poems, and Invocations for Honoring the Earth",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
Murder of a Sleeping Beauty (Scumble River Mysteries (Paperback)),0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Space (Star Trek The Next Generation, Book 47)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
"Q-Zone (Star Trek The Next Generation, Book 48)",0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0
!Yo!,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,...,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0,0.0


In [57]:
#finding the similarity matrix using cosine similarity
from sklearn.metrics.pairwise import cosine_similarity
similarity_score = cosine_similarity(pivot_rating)
similarity_score.shape
print(similarity_score)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 1.]]


In [58]:
testsimilarity_score = cosine_similarity(testpivot_rating)
testsimilarity_score.shape
print(testsimilarity_score)

[[1. 0. 0. ... 0. 0. 0.]
 [0. 1. 0. ... 0. 0. 0.]
 [0. 0. 1. ... 0. 0. 0.]
 ...
 [0. 0. 0. ... 1. 0. 0.]
 [0. 0. 0. ... 0. 1. 0.]
 [0. 0. 0. ... 0. 0. 0.]]


In [59]:
def recommend(book_name):
  try:
    index = np.where(pivot_rating.index==book_name)[0][0]
    similar_books = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse=True)[1:6]
    
    data_train = []
    
    for i in similar_books:
        item = []
        temp_df = books[books['Book-Title'] == pivot_rating.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        
        data_train.append(item)
    return data_train
  except:
    print('similar books not found due to insuffecient user ratings')
    

In [60]:
def testrecommend(book_name):
  try:
    index = np.where(testpivot_rating.index==book_name)[0][0]
    similar_books = sorted(list(enumerate(testsimilarity_score[index])),key=lambda x:x[1], reverse=True)[1:6]
    
    data_test = []
    
    for i in similar_books:
        item = []
        temp_df = books[books['Book-Title'] == pivot_rating.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        
        data_test.append(item)
    return data_test
  except:
    print('similar books not found due to insuffecient user ratings')

# TESTING

In [61]:
recommend('Pride and Prejudice')

[['Adventures of Tom Sawyer (Signet Classic Series)', 'Mark Twain'],
 ['The Republic of Love', 'Carol Shields'],
 ['Canal Dreams', 'Iain Banks'],
 ['Halloween Party', 'Agatha Christie'],
 ['Middlemarch', 'George Eliot']]

In [62]:
testrecommend('Pride and Prejudice')

[['A Spirited Seduction (Haunting Hearts)', 'Casey Claybourne'],
 ['Dark Enchantment', 'KAREN HARBAUGH'],
 ['Possessing the Secret of Joy', 'Alice Walker'],
 ['The Dolphins of Pern', 'Anne McCaffrey'],
 ['The Summer I Dared: A Novel', 'Barbara Delinsky']]

# CHATBOT

In [65]:
from chatterbot import ChatBot
import time
time.clock = time.time

chatbot = ChatBot("Chatpot")


exit_conditions = (":q", "quit", "exit")

while True:

    query = input("> ")

    if query in exit_conditions:

        break

    else:

        print(f"🪴 {chatbot.get_response(query)}")

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Dhivyaharshini\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dhivyaharshini\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Dhivyaharshini\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


> quit


In [66]:
def recommend(book_name):
  try:
    index = np.where(pivot_rating.index==book_name)[0][0]
    similar_books = sorted(list(enumerate(similarity_score[index])),key=lambda x:x[1], reverse=True)[1:6]
    
    data = []
    
    for i in similar_books:
        item = []
        temp_df = books[books['Book-Title'] == pivot_rating.index[i[0]]]
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Title'].values))
        item.extend(list(temp_df.drop_duplicates('Book-Title')['Book-Author'].values))
        
        data.append(item)
    return data
  except:
    print('similar books not found due to insuffecient user ratings')
    
from chatterbot import ChatBot
from chatterbot.trainers import ListTrainer
import time
time.clock = time.time

chatbot = ChatBot("Chatpot")

trainer = ListTrainer(chatbot)
trainer.train([
    "Hi",
    "Welcome to your very own book recommendor channel"])
trainer.train([
    "I am looking for a book",
    "Tell me something you have read before and I'll give you a few options",
])
trainer.train([
    "Thank you",
    "You are welcome",
])

exit_conditions = (":q", "quit", "exit")
while True:
    query = input("> ")
    if query in exit_conditions:
        break
    elif query=="I am looking for a book":
        print("Tell me something you have read before and I'll give you a few options")
        booktitle=input("Book Title: ")
        data = recommend(booktitle)
        for i in data:
            print(i)
    else:
        print(f" 📚 {chatbot.get_response(query)}")


List Trainer: [####################] 100%
List Trainer: [####################] 100%
List Trainer: [####################] 100%

[nltk_data] Downloading package averaged_perceptron_tagger to
[nltk_data]     C:\Users\Dhivyaharshini\AppData\Roaming\nltk_data...
[nltk_data]   Package averaged_perceptron_tagger is already up-to-
[nltk_data]       date!
[nltk_data] Downloading package punkt to
[nltk_data]     C:\Users\Dhivyaharshini\AppData\Roaming\nltk_data...
[nltk_data]   Package punkt is already up-to-date!
[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\Dhivyaharshini\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!



> Hi
 📚 Welcome to your very own book recommndor channel
> I am looking for a book
Tell me something you have read before and I'll give you a few options
Book Title: Pride and Prejudice
['Adventures of Tom Sawyer (Signet Classic Series)', 'Mark Twain']
['The Republic of Love', 'Carol Shields']
['Canal Dreams', 'Iain Banks']
['Halloween Party', 'Agatha Christie']
['Middlemarch', 'George Eliot']
> Thank you
 📚 You are welcome
> quit
