In [52]:
import getpass
import pandas as pd
from pymongo import MongoClient

In [53]:
password = getpass.getpass("MongoDB password: ")

MongoDB password:  ········


In [54]:
client = MongoClient(f'mongodb://book_group:{password}@macragge.reika.io:47017/?authSource=books')

In [55]:
db = client['books']
collection = db['books']

In [56]:
# Fetch data from MongoDB
query = {'random': {'$lte': 40}}
data = list(collection.find(query))  # Retrieve documents as a list of dictionaries 

In [57]:
# Create DataFrame and preview
df = pd.DataFrame(data)

# Filter the DataFrame to only include rows where 'language_code' is 'eng'
df = df[df['language_code'] == "eng"]

# Display the first few rows of the filtered DataFrame
df.head()

Unnamed: 0,_id,isbn,text_reviews_count,series,country_code,language_code,popular_shelves,asin,is_ebook,average_rating,...,publication_year,url,image_url,book_id,ratings_count,work_id,title,title_without_series,genre,random
1,66df9d075a178eb80fbd4d51,970190719.0,11,[],US,eng,"[{'count': '66', 'name': 'to-read'}, {'count':...",,False,4.06,...,2002,https://www.goodreads.com/book/show/1006140.Th...,https://s.gr-assets.com/assets/nophoto/book/11...,1006140,27,652659,The Tree,The Tree,children,26
3,66df9d075a178eb80fbd4d8f,590421921.0,9,[155231],US,eng,"[{'count': '38', 'name': 'to-read'}, {'count':...",,False,4.07,...,1989,https://www.goodreads.com/book/show/1151567.Dr...,https://images.gr-assets.com/books/1326125976m...,1151567,229,1139050,"Drina's Dancing Year (Drina, #2)","Drina's Dancing Year (Drina, #2)",children,24
4,66df9d075a178eb80fbd4d9b,399216154.0,2,[361869],US,eng,"[{'count': '25', 'name': 'to-read'}, {'count':...",,False,4.3,...,1989,https://www.goodreads.com/book/show/1076044.An...,https://images.gr-assets.com/books/1303608064m...,1076044,25,583367,Anno's Math Games II,Anno's Math Games II,children,38
10,66df9d085a178eb80fbd4e50,1851520198.0,2,[],US,eng,"[{'count': '93', 'name': 'to-read'}, {'count':...",,False,4.38,...,1986,https://www.goodreads.com/book/show/2974706-th...,https://s.gr-assets.com/assets/nophoto/book/11...,2974706,10,114888,The Arthur Rackham Fairy Book,The Arthur Rackham Fairy Book,children,18
12,66df9d085a178eb80fbd4e9c,,3,[],US,eng,"[{'count': '10', 'name': 'to-read'}, {'count':...",,False,4.17,...,2016,https://www.goodreads.com/book/show/29356054-a...,https://images.gr-assets.com/books/1456640073m...,29356054,18,49599175,"A Soldier, a Dog and a Boy","A Soldier, a Dog and a Boy",children,36


In [58]:
df['average_rating'] = df['average_rating'].astype(float)

In [59]:
#Dataframe/pivot table with index of title and genre as user id values of book ratings
pt = df.pivot_table(index='title', columns='genre', values='average_rating', aggfunc='mean')
pt.fillna(0,inplace=True)

In [60]:
pt.tail(20)

genre,children,comics_graphic,fantasy_paranormal,history_biography,mystery_thriller_crime,poetry,romance,young_adult
title,Unnamed: 1_level_1,Unnamed: 2_level_1,Unnamed: 3_level_1,Unnamed: 4_level_1,Unnamed: 5_level_1,Unnamed: 6_level_1,Unnamed: 7_level_1,Unnamed: 8_level_1
pollen and the storm,0.0,0.0,0.0,0.0,0.0,4.5,0.0,0.0
tea's aftertaste,0.0,0.0,0.0,0.0,0.0,5.0,0.0,0.0
the Beastly Pirates,3.56,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the marahani pearls,0.0,0.0,0.0,3.66,0.0,0.0,0.0,0.0
the single girls to do list,0.0,0.0,0.0,0.0,0.0,0.0,3.94,0.0
the throne of tara,0.0,0.0,3.54,0.0,0.0,0.0,0.0,0.0
the very helpful monsters,3.96,0.0,0.0,0.0,0.0,0.0,0.0,0.0
the wonderfull wizard of oz,3.98,0.0,0.0,0.0,0.0,0.0,0.0,0.0
unvamped,0.0,0.0,3.38,0.0,0.0,0.0,0.0,0.0
"xxxHolic, Vol. 18 (xxxHOLiC, #18)",0.0,4.32,0.0,0.0,0.0,0.0,0.0,0.0


In [62]:
from sklearn.metrics.pairwise import cosine_similarity

In [63]:
similarity_scores = cosine_similarity(pt)

In [64]:
similarity_scores.shape

(21633, 21633)

In [65]:
import numpy as np
def recommendation(book_name):
    # Fetching Index
    if book_name in list(pt.index):
        index = np.where(np.array(list(pt.index)) == book_name)[0][0]
        
        # Retrieve similarity scores for the specific book
        similarity_scores_for_book = similarity_scores[index]
        
        # Sort similar items based on the specific book's similarity scores
        similar_items = sorted(list(enumerate(similarity_scores_for_book)), reverse=True, key=lambda x: x[1])[1:9]
        
        data = []
        for i in similar_items:
            title = pt.index[i[0]]
            temp_df = df[df['title'] == title]
            item = list(temp_df.drop_duplicates('title')['title'].values)
            data.append(item)
        
    else:
        print(f"The book '{book_name}' is not found in the index.")

    return data

In [69]:
recommendation("the marahani pearls")

[['11.22.63'],
 ['11/22/63'],
 ['1177 B.C.: The Year Civilization Collapsed'],
 ['11th Hour Rose (Langston Brothers #3)'],
 ['1776'],
 ['1929 (The 1929 Series, #1)'],
 ['1940s: Decades of the 20th Century'],
 ['23 Years: A Study of the Prophetic Career of Mohammad']]

In [50]:
client.close()