In [3]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [4]:
from dotenv import load_dotenv

load_dotenv()

True

In [5]:
import pandas as pd

books = pd.read_csv('book_cleaned.csv')

In [6]:
books["tagged_description"]

0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [7]:
books["tagged_description"].to_csv('tagged_description.txt',
                                   sep="\n",
                                   index=False,
                                   header=False)

In [8]:
raw_documents = TextLoader("tagged_description.txt", encoding="utf-8").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1168, which is longer than the specified 0
Created a chunk of size 1214, which is longer than the specified 0
Created a chunk of size 373, which is longer than the specified 0
Created a chunk of size 309, which is longer than the specified 0
Created a chunk of size 483, which is longer than the specified 0
Created a chunk of size 482, which is longer than the specified 0
Created a chunk of size 960, which is longer than the specified 0
Created a chunk of size 188, which is longer than the specified 0
Created a chunk of size 843, which is longer than the specified 0
Created a chunk of size 296, which is longer than the specified 0
Created a chunk of size 197, which is longer than the specified 0
Created a chunk of size 881, which is longer than the specified 0
Created a chunk of size 1088, which is longer than the specified 0
Created a chunk of size 1189, which is longer than the specified 0
Created a chunk of size 304, which is longer than the specified 0
Create

In [8]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [12]:
db_books = Chroma.from_documents(documents, embedding=OpenAIEmbeddings())

In [13]:
query = "A book about motivation"
docs = db_books.similarity_search(query, k = 10)
docs

[Document(id='54f29bd9-54c5-4523-86c4-b3e785dddc9e', metadata={'source': 'tagged_description.txt'}, page_content='9781400078394 An authority on cognitive psychology and motivation documents the positive effects of optimism on the quality of life and provides a program of specific exercises designed to break the pessimism habit while developing an optimistic outlook. Reprint. 25,000 first printing.'),
 Document(id='0ad2182e-f308-4524-9674-208438429bcf', metadata={'source': 'tagged_description.txt'}, page_content='9780743297301 The son of the author of the best-selling 7 Habits series explains how trust is a key catalyst for personal and organizational success in the twenty-first century, in a guide for businesspeople that demonstrates how to inspire trust while overcoming bureaucratic obstacles. 100,000 first printing.'),
 Document(id='617c38c8-b3e3-4877-a783-ba02d7d3524e', metadata={'source': 'tagged_description.txt'}, page_content='9780749919443 Diana Cooper believes we can all transf

In [14]:
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
4385,9781400078394,1400078393,Learned Optimism,Martin E. P. Seligman,Psychology,http://books.google.com/books/content?id=JYxID...,An authority on cognitive psychology and motiv...,2006.0,4.0,319.0,13231.0,How to Change Your Mind and Your Life,9781400078394 An authority on cognitive psycho...


In [15]:
def retrevie_semantic_recommendations(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k = 50)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return books[books["isbn13"].isin(books_list)].head(top_k)

In [20]:
retrevie_semantic_recommendations("A book to teach childrens about computer")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
69,9780007179817,0007179812,Microserfs,Douglas Coupland,Computer programmers,http://books.google.com/books/content?id=N3Amm...,"At computer giant Microsoft, Dan, Susan, Abe, ...",2004.0,3.88,371.0,436.0,Microserfs,"9780007179817 At computer giant Microsoft, Dan..."
545,9780130887016,0130887013,C++ Programmer's Notebook,James Edward Keogh;John Shapley Gray,Computers,http://books.google.com/books/content?id=tmgPA...,Covers all the C++ concepts a programmer needs...,2002.0,3.0,528.0,3.0,C++ Programmer's Notebook,9780130887016 Covers all the C++ concepts a pr...
546,9780131492028,0131492020,Java,Walter J. Savitch,Computers,http://books.google.com/books/content?id=FlU_A...,Introduces object-oriented programming and imp...,2005.0,3.86,1060.0,23.0,An Introduction to Problem Solving & Programming,9780131492028 Introduces object-oriented progr...
707,9780140568196,0140568190,The Giraffe and the Pelly and Me,Roald Dahl;Quentin Blake,Candy,http://books.google.com/books/content?id=J7FdI...,"A Dahl story in which the giraffe, the pelican...",2001.0,3.81,32.0,16265.0,The Giraffe and the Pelly and Me,9780140568196 A Dahl story in which the giraff...
711,9780140621624,0140621628,The Railway Children,E. Nesbit,Fiction,http://books.google.com/books/content?id=fFesd...,"When their father is sent away to prison, thre...",1995.0,4.0,212.0,178.0,The Railway Children,9780140621624 When their father is sent away t...
855,9780143037392,0143037390,The Read-aloud Handbook,Jim Trelease,Language Arts & Disciplines,http://books.google.com/books/content?id=B2_yU...,Explains the importance of reading aloud to ch...,2006.0,4.4,432.0,4122.0,The Read-aloud Handbook,9780143037392 Explains the importance of readi...
917,9780156032971,015603297X,Foucault's Pendulum,Umberto Eco,Fiction,http://books.google.com/books/content?id=Ubkvm...,"Three editors, inspired by an extraordinary fa...",2007.0,3.9,623.0,44658.0,Foucault's Pendulum,"9780156032971 Three editors, inspired by an ex..."
1027,9780201748673,0201748673,Robin Williams Web Design Workshop,John Tollett;Robin Williams;David Rohr,Computers,http://books.google.com/books/content?id=lhatg...,Illustrating her ideas with hundreds of exampl...,2002.0,3.78,384.0,51.0,Robin Williams Web Design Workshop,9780201748673 Illustrating her ideas with hund...
1059,9780226817415,0226817415,From Counterculture to Cyberculture,Fred Turner,Social Science,http://books.google.com/books/content?id=wz5Em...,"In the early 1960s, computers haunted the Amer...",2006.0,3.96,327.0,323.0,"Stewart Brand, the Whole Earth Network, and th...","9780226817415 In the early 1960s, computers ha..."
1078,9780241003008,0241003008,The Very Hungry Caterpillar,Eric Carle,Babytime resource,http://books.google.com/books/content?id=DpGEQ...,Eric Carle's children's classic is the story o...,1994.0,4.29,26.0,340101.0,The Very Hungry Caterpillar,9780241003008 Eric Carle's children's classic ...
