In [13]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [14]:
from dotenv import load_dotenv

load_dotenv()

True

In [15]:
import pandas as pd

In [16]:
books = pd.read_csv('books_cleaned.csv')

In [17]:
books["tagged_description"].to_csv('tagged_description.txt', sep='\n', index=False, header=False)

In [18]:
raw_documents = TextLoader("tagged_description.txt").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1170, which is longer than the specified 0
Created a chunk of size 1216, which is longer than the specified 0
Created a chunk of size 375, which is longer than the specified 0
Created a chunk of size 311, which is longer than the specified 0
Created a chunk of size 485, which is longer than the specified 0
Created a chunk of size 484, which is longer than the specified 0
Created a chunk of size 962, which is longer than the specified 0
Created a chunk of size 190, which is longer than the specified 0
Created a chunk of size 845, which is longer than the specified 0
Created a chunk of size 298, which is longer than the specified 0
Created a chunk of size 199, which is longer than the specified 0
Created a chunk of size 883, which is longer than the specified 0
Created a chunk of size 1090, which is longer than the specified 0
Created a chunk of size 1191, which is longer than the specified 0
Created a chunk of size 306, which is longer than the specified 0
Create

In [19]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 : A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, 

In [20]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings(),
)

In [21]:
query = "A book to teach children about nature"

results = db_books.similarity_search(query, k=10)
results

[Document(id='bfb2c8f5-ca89-471b-8d40-1174ca89b1ec', metadata={'source': 'tagged_description.txt'}, page_content='9780786808069: Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience.'),
 Document(id='dbec9b45-b4f3-4ee6-b102-28cd09fb80dd', metadata={'source': 'tagged_description.txt'}, page_content='9780786808069 : Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience.'),
 Document(id='7a76a5fc-ed19-4f53-9950-0f497c85fb11', metadata={'source': 'tagged_description.txt'}, page_content="9780786808380: Introduce your babies to birds, cats, dogs, and babies through fine art, illus

In [23]:
books[books["isbn13"] == int(results[0].page_content.split()[0].strip(':'))]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
3747,9780786808069,786808063,Baby Einstein: Neighborhood Animals,Marilyn Singer;Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=X9a4P...,Children will discover the exciting world of t...,2001.0,3.89,16.0,180.0,Baby Einstein: Neighborhood Animals,9780786808069 : Children will discover the exc...


In [33]:
def retrieve_semantic_recommendations(query:str, top_k:int) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k=50)

    book_lists = []

    for i in range(0, len(recs)):
        book_lists += [int(recs[i].page_content.strip('"').split()[0].strip(':'))]

    return books[books["isbn13"].isin(book_lists)].head(top_k)

In [34]:
retrieve_semantic_recommendations(query, top_k=10)

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
31,9780007105045,0007105045,Tree and Leaf,John Ronald Reuel Tolkien,Literary Collections,http://books.google.com/books/content?id=aPb_A...,"""The two works 'On fairy-stories' and 'Leaf by...",2001.0,4.09,176.0,2245.0,Tree and Leaf: The Homecoming of Beorhtnoth : ...,"9780007105045 : ""The two works 'On fairy-stori..."
429,9780064434980,0064434982,The Deer in the Wood,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=V7YDW...,Even the youngest child can enjoy a special ad...,1999.0,4.17,32.0,302.0,The Deer in the Wood,9780064434980 : Even the youngest child can en...
442,9780067575208,006757520X,The Sense of Wonder,Rachel Carson,Nature,http://books.google.com/books/content?id=Zee5S...,"First published more than three decades ago, t...",1998.0,4.39,112.0,1160.0,The Sense of Wonder,9780067575208 : First published more than thre...
692,9780140448009,0140448004,Three Tales,Gustave Flaubert;Roger Whitehouse;Geoffrey Wall,Fiction,http://books.google.com/books/content?id=XFzga...,Features short fiction by the French naturalis...,2005.0,3.71,110.0,3050.0,Three Tales,9780140448009 : Features short fiction by the ...
707,9780140568196,0140568190,The Giraffe and the Pelly and Me,Roald Dahl;Quentin Blake,Candy,http://books.google.com/books/content?id=J7FdI...,"A Dahl story in which the giraffe, the pelican...",2001.0,3.81,32.0,16265.0,The Giraffe and the Pelly and Me,9780140568196 : A Dahl story in which the gira...
1078,9780241003008,0241003008,The Very Hungry Caterpillar,Eric Carle,Babytime resource,http://books.google.com/books/content?id=DpGEQ...,Eric Carle's children's classic is the story o...,1994.0,4.29,26.0,340101.0,The Very Hungry Caterpillar,9780241003008 : Eric Carle's children's classi...
1639,9780374422080,0374422087,Everything on a Waffle,Polly Horvath,Juvenile Fiction,http://books.google.com/books/content?id=NimVJ...,This Newbery Honor Book tells the story of 11 ...,2004.0,3.71,150.0,9631.0,Everything on a Waffle,9780374422080 : This Newbery Honor Book tells ...
2201,9780440362050,0440362059,My Name is Aram,William Saroyan,Fiction,http://books.google.com/books/content?id=2P3XA...,A small boy finds excitement and promise in th...,1966.0,4.15,151.0,975.0,My Name is Aram,9780440362050 : A small boy finds excitement a...
2942,9780618711666,061871166X,The Hungry Tide,Amitav Ghosh,Fiction,http://books.google.com/books/content?id=LN16i...,"Presents a novel of life in modern India, chro...",2006.0,3.94,333.0,11220.0,The Hungry Tide,9780618711666 : Presents a novel of life in mo...
3061,9780679451211,0679451218,The Sibley Field Guide to Birds of Western Nor...,,Nature,http://books.google.com/books/content?id=Fd0PA...,A guide for bird enthusiasts specifically desi...,2003.0,4.7,473.0,709.0,The Sibley Field Guide to Birds of Western Nor...,9780679451211 : A guide for bird enthusiasts s...
