In [22]:
import pandas as pd
import numpy as np
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_huggingface import HuggingFaceEmbeddings
from langchain_chroma import Chroma


books = pd.read_csv('books_cleaned.csv')

In [23]:
books

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,9788172235222 On A Train Journey Home To North...
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,9788173031014 This book tells the tale of a ma...
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...


**take the tagged description and make a textfile**

In [25]:
books["tagged_description"].to_csv("tagged_description.txt", index=False, header=False)

In [26]:
raw_documents = TextLoader("tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1170, which is longer than the specified 0
Created a chunk of size 1216, which is longer than the specified 0
Created a chunk of size 375, which is longer than the specified 0
Created a chunk of size 311, which is longer than the specified 0
Created a chunk of size 483, which is longer than the specified 0
Created a chunk of size 484, which is longer than the specified 0
Created a chunk of size 962, which is longer than the specified 0
Created a chunk of size 188, which is longer than the specified 0
Created a chunk of size 845, which is longer than the specified 0
Created a chunk of size 296, which is longer than the specified 0
Created a chunk of size 197, which is longer than the specified 0
Created a chunk of size 881, which is longer than the specified 0
Created a chunk of size 1090, which is longer than the specified 0
Created a chunk of size 1191, which is longer than the specified 0
Created a chunk of size 306, which is longer than the specified 0
Create

**make the chroma database**

In [27]:
huggingface_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")

db_books = Chroma.from_documents(documents, huggingface_embeddings)


**Now that we have made the database , we can now make predictions**

In [30]:
query = "Evil king"
docs = db_books.similarity_search(query, k= 10)
docs

[Document(id='a18c81c8-3d93-42c9-bc08-b73244ef62fd', metadata={'source': 'tagged_description.txt'}, page_content='"9780751504385 Unrivalled monarch of the macabre Stephen King again takes the unsuspecting reader on a fantastic journey through the dark, shadowy areas of our innermost fears. In a bumper collection of truly chilling tales, we meet Gramma, who only wants to hug little George, even after she was dead; The Raft - a primeval sea creature with an insatiable appetite; The Monkey - an innocent-looking toy with sinister powers; the unspeakable horror of The Mist. And there is a gruesome host of other stories, each with the distinctive blend of unimaginable terror and realism that typifies King\'s writing."'),
 Document(id='8d929fcf-544d-4e87-a03c-5215abd76a37', metadata={'source': 'tagged_description.txt'}, page_content='"9780007137336 When a dangerous necromancer threatens to unleash a long-buried evil, Lirael and Prince Sameth are drawn into a battle to save the Old Kingdom and

**Return the books title and all other info**

In [31]:
books[books["isbn13"] == int(docs[0].page_content.strip('"').split()[0])]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
3494,9780751504385,751504386,Skeleton Crew,Stephen King,English fiction,http://books.google.com/books/content?id=WcrCQ...,Unrivalled monarch of the macabre Stephen King...,1993.0,3.95,612.0,93593.0,Skeleton Crew,9780751504385 Unrivalled monarch of the macabr...


**Now make a function to give top 10 suggestion**

In [34]:
def retrieve_semantic_recommendation(query, top_k=10)-> pd.DataFrame:
    recs = db_books.similarity_search(query, k=50)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return books[books["isbn13"].isin(books_list)].head(top_k)

In [35]:
retrieve_semantic_recommendation("Ghost King")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
53,9780007135653,0007135653,The Lord of the Rings,Gary Russell,Characters and characteristics in motion pictures,http://books.google.com/books/content?id=oEZ6A...,With complete access to artwork created over a...,2004.0,4.54,224.0,28.0,The Lord of the Rings: The Art of The Return o...,9780007135653 With complete access to artwork ...
56,9780007137336,0007137338,Lirael,Garth Nix,Fantasy fiction,http://books.google.com/books/content?id=sDzU8...,When a dangerous necromancer threatens to unle...,2004.0,4.3,527.0,1339.0,Lirael: Daughter of the Clayr,9780007137336 When a dangerous necromancer thr...
491,9780099446729,0099446723,Blackwood Farm,Anne Rice,Horror,http://books.google.com/books/content?id=cIn8T...,"Lestat Is Back, Saviour And Demon, Presiding O...",2003.0,3.86,774.0,26145.0,Blackwood Farm,"9780099446729 Lestat Is Back, Saviour And Demo..."
501,9780099456452,0099456451,The Spook's Apprentice,Joseph Delaney,Exorcism,http://books.google.com/books/content?id=ZA0yS...,A wonderful and terrifying series by a new wri...,2005.0,3.98,325.0,2733.0,The Spook's Apprentice,9780099456452 A wonderful and terrifying serie...
503,9780099460176,0099460173,Blood Canticle,Anne Rice,Horror tales,http://books.google.com/books/content?id=iQjGw...,Lestat is back with a vengeance and in thrall ...,2004.0,3.72,400.0,18646.0,Blood Canticle,9780099460176 Lestat is back with a vengeance ...
558,9780140071788,0140071784,Roald Dahl's Book of Ghost Stories,Roald Dahl,"Ghost stories, American",http://books.google.com/books/content?id=zGt6R...,Who better to investigate the literary spirit ...,1985.0,3.69,249.0,2403.0,Roald Dahl's Book of Ghost Stories,9780140071788 Who better to investigate the li...
712,9780140714548,0140714545,Hamlet,William Shakespeare,Drama,http://books.google.com/books/content?id=4RbYu...,"Hamlet, Prince of Denmark is faced by a ghost ...",2001.0,4.01,148.0,1605.0,Hamlet,"9780140714548 Hamlet, Prince of Denmark is fac..."
1062,9780231082815,0231082819,Three Plays by Kobo Abe,Kōbō Abe,Fiction,http://books.google.com/books/content?id=D9Ggn...,Three plays by one of contemporary Japan's mos...,1993.0,3.91,233.0,76.0,Three Plays by Kobo Abe,9780231082815 Three plays by one of contempora...
1100,9780262740159,026274015X,Looking Awry,Slavoj Žižek,Performing Arts,http://books.google.com/books/content?id=COT8n...,"Slavoj Zizek, a leading intellectual in the ne...",1992.0,4.05,188.0,1276.0,Looking Awry: An Introduction to Jacques Lacan...,"9780262740159 Slavoj Zizek, a leading intellec..."
1466,9780345419620,0345419626,The Queen of the Damned,Anne Rice,Fiction,http://books.google.com/books/content?id=LKOyy...,Intertwines the stories of rock star and vampi...,1997.0,3.88,448.0,118857.0,The Queen of the Damned,9780345419620 Intertwines the stories of rock ...
