In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma

In [2]:
import pandas as pd

In [3]:
data = pd.read_csv('data_cleaned.csv')

In [4]:
data.head()

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_subtitle,tagged_description
0,9780002005883,2005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 Gilead A NOVEL THAT READERS and ...
1,9780002261982,2261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web A Novel,9780002261982 Spider's Web A Novel A new 'Chri...
2,9780006178736,6178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 Rage of angels A memorable, mesm..."
3,9780006280897,6280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 The Four Loves Lewis' work on th...
4,9780006280934,6280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 The Problem of Pain ""In The Prob..."


In [5]:
data['tagged_description'].to_csv(
    'tagged_description.txt',
    sep = '\n',
    index=False,
    header=False
)

In [6]:
raw_documents = TextLoader('tagged_description.txt',encoding='utf-8').load()
text_splitter = CharacterTextSplitter(chunk_size=0,chunk_overlap=0,separator='\n')

In [7]:
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1175, which is longer than the specified 0
Created a chunk of size 1235, which is longer than the specified 0
Created a chunk of size 388, which is longer than the specified 0
Created a chunk of size 324, which is longer than the specified 0
Created a chunk of size 503, which is longer than the specified 0
Created a chunk of size 552, which is longer than the specified 0
Created a chunk of size 997, which is longer than the specified 0
Created a chunk of size 207, which is longer than the specified 0
Created a chunk of size 863, which is longer than the specified 0
Created a chunk of size 321, which is longer than the specified 0
Created a chunk of size 213, which is longer than the specified 0
Created a chunk of size 896, which is longer than the specified 0
Created a chunk of size 1105, which is longer than the specified 0
Created a chunk of size 1234, which is longer than the specified 0
Created a chunk of size 321, which is longer than the specified 0
Create

In [8]:
huggingface_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")
db_books = Chroma.from_documents(
    documents,
    embedding=huggingface_embeddings
)

  huggingface_embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2")


In [34]:
query = 'Religious book'
docs= db_books.similarity_search(query,k=10)
docs

[Document(id='234464af-8876-4be8-9f16-2d339b528704', metadata={'source': 'tagged_description.txt'}, page_content='"9780300000894 Psychoanalysis and Religion A noted psychoanalyst assesses the modern issue between traditional religion and a philosophy that takes as the sole aim in life the satisfaction of instinctive and material values. ""A daring book to have cast into the midst of the world\'s excitements, for it will itself breed new excitements...it is not a book to be missed by those interested in man\'s spiritual growth.""--New York Times"'),
 Document(id='f7f4aef6-7d46-4885-ac7f-faebdebf8fec', metadata={'source': 'tagged_description.txt'}, page_content='"9780300000894 Psychoanalysis and Religion A noted psychoanalyst assesses the modern issue between traditional religion and a philosophy that takes as the sole aim in life the satisfaction of instinctive and material values. ""A daring book to have cast into the midst of the world\'s excitements, for it will itself breed new exci

In [37]:
data[data['isbn13']==int(docs[2].page_content.split()[0].strip())]  

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_subtitle,tagged_description
4904,9781594481574,1594481571,Plan B,Anne Lamott,Biography & Autobiography,http://books.google.com/books/content?id=__EKD...,A spiritual guide by the author of the best-se...,2006.0,4.06,352.0,18609.0,Plan B Further Thoughts on Faith,9781594481574 Plan B Further Thoughts on Faith...


In [47]:
import re
def retrieve_recommendation(
        query:str,
        top_k:int=10,
        
)->pd.DataFrame:
    recs = db_books.similarity_search(query,k=50)
    book_list=[]
    
    for i in range(0, len(recs)):
        match = re.match(r'["]?(\d+)', recs[i].page_content) 
        if match:
            isbn = match.group(1)  
            book_list.append(isbn)
    return data[data['isbn13'].astype(str).isin(book_list)].head(top_k)

In [53]:
retrieve_recommendation('Nature',10)

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_subtitle,tagged_description
59,9780007151240,0007151241,The Family Way,Tony Parsons,Parenthood,http://books.google.com/books/content?id=dJEIx...,It should be the most natural thing in the wor...,2005.0,3.51,400.0,2095.0,The Family Way,9780007151240 The Family Way It should be the ...
297,9780060929596,0060929596,As Nature Made Him,John Colapinto,Medical,http://books.google.com/books/content?id=92jVV...,Tells the story of a man whose botched circumc...,2001.0,3.97,320.0,5282.0,As Nature Made Him The Boy Who Was Raised as a...,9780060929596 As Nature Made Him The Boy Who W...
388,9780061196676,0061196673,Smithsonian Intimate Guide to Human Origins,Carl Zimmer,Social Science,http://books.google.com/books/content?id=xufuS...,From the savannas of Africa to modern-day labs...,2007.0,4.0,176.0,167.0,Smithsonian Intimate Guide to Human Origins,9780061196676 Smithsonian Intimate Guide to Hu...
460,9780099273844,0099273845,An Imaginary Life,David Malouf,Australian fiction,http://books.google.com/books/content?id=6TBts...,"""In the first century A. D. , Publius Ovidius ...",1999.0,3.87,156.0,1469.0,An Imaginary Life,"9780099273844 An Imaginary Life ""In the first ..."
799,9780142003343,0142003344,The Blank Slate,Steven Pinker,Psychology,http://books.google.com/books/content?id=7rJ5g...,In a study of the nature versus nurture debate...,2003.0,4.08,528.0,17851.0,The Blank Slate The Modern Denial of Human Nature,9780142003343 The Blank Slate The Modern Denia...
991,9780192862099,019286209X,The Origins of Life,John Maynard Smith;Eörs Szathmáry,Science,http://books.google.com/books/content?id=nHDbB...,'I can recommend this book as a thoroughly int...,2000.0,4.11,192.0,41.0,The Origins of Life From the Birth of Life to ...,9780192862099 The Origins of Life From the Bir...
1391,9780330351690,0330351699,Into the Wild,Jon Krakauer,True Crime,http://books.google.com/books/content?id=X7M1D...,"Using the true story of a young man, who in 19...",1998.0,3.97,206.0,1268.0,Into the Wild,9780330351690 Into the Wild Using the true sto...
1642,9780374522599,0374522596,The Control of Nature,John McPhee,Nature,http://books.google.com/books/content?id=p1qKQ...,The Control of Nature is John McPhee's bestsel...,1990.0,4.24,288.0,3365.0,The Control of Nature,9780374522599 The Control of Nature The Contro...
1734,9780375760396,0375760393,The Botany of Desire,Michael Pollan,Gardening,http://books.google.com/books/content?id=tsx9x...,Focusing on the human relationship with plants...,2001.0,4.06,297.0,39426.0,The Botany of Desire A Plant's-eye View of the...,9780375760396 The Botany of Desire A Plant's-e...
1885,9780387987200,0387987207,Leaps of Faith,Nicholas Humphrey,Medical,http://books.google.com/books/content?id=V-9-I...,"""Elegant and literate"" -THE TIMES OF LONDON ""T...",1999.0,3.76,244.0,20.0,"Leaps of Faith Science, Miracles, and the Sear...","9780387987200 Leaps of Faith Science, Miracles..."
