In [1]:
from langchain_community.document_loaders import TextLoader # takes in text from docs and convert into a format that Langchain can work with
from langchain_text_splitters import CharacterTextSplitter # chunking
from langchain_openai import OpenAIEmbeddings # document embeddings
from langchain_chroma import Chroma # vector DB

In [2]:
from dotenv import load_dotenv

load_dotenv()

True

In [3]:
import pandas as pd

books = pd.read_csv("books_cleaned.csv")

In [4]:
books.head()

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,2005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,2261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,6178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,6280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,6280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."


In [5]:
books["tagged_description"]


0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [32]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index = False,
                                   header = False)

ValueError: bad delimiter value

In [28]:
raw_documents = TextLoader("tagged_description.txt",encoding="utf-8").load()
text_splitter = CharacterTextSplitter(chunk_size=0, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1168, which is longer than the specified 0
Created a chunk of size 1214, which is longer than the specified 0
Created a chunk of size 373, which is longer than the specified 0
Created a chunk of size 309, which is longer than the specified 0
Created a chunk of size 483, which is longer than the specified 0
Created a chunk of size 482, which is longer than the specified 0
Created a chunk of size 960, which is longer than the specified 0
Created a chunk of size 188, which is longer than the specified 0
Created a chunk of size 843, which is longer than the specified 0
Created a chunk of size 296, which is longer than the specified 0
Created a chunk of size 197, which is longer than the specified 0
Created a chunk of size 881, which is longer than the specified 0
Created a chunk of size 1088, which is longer than the specified 0
Created a chunk of size 1189, which is longer than the specified 0
Created a chunk of size 304, which is longer than the specified 0
Create

In [29]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [30]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings())

In [34]:
query = "A book to teach children about nature"
docs = db_books.similarity_search(query, k = 10)
docs

[Document(id='2b1bfeb9-c9c8-4a18-9838-adbc2b34ecba', metadata={'source': 'tagged_description.txt'}, page_content='"9780786808069 Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience."'),
 Document(id='66859765-0dbc-463c-a020-8b41e6bb1e33', metadata={'source': 'tagged_description.txt'}, page_content='9780786808069 Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience.'),
 Document(id='12bd4e91-6791-49cd-916a-3baeccebd8d0', metadata={'source': 'tagged_description.txt'}, page_content="9780786808380 Introduce your babies to birds, cats, dogs, and babies through fine art, illustr

In [37]:
int(docs[0].page_content.split()[0].strip('"'))

9780786808069

In [38]:
books[books["isbn13"] == int(docs[0].page_content.split()[0].strip('"'))]


Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
3747,9780786808069,786808063,Baby Einstein: Neighborhood Animals,Marilyn Singer;Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=X9a4P...,Children will discover the exciting world of t...,2001.0,3.89,16.0,180.0,Baby Einstein: Neighborhood Animals,9780786808069 Children will discover the excit...


In [39]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k = 50)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return books[books["isbn13"].isin(books_list)]

In [42]:
retrieve_semantic_recommendations("Murder Stories")


Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
45,9780007121007,7121008,Mrs McGinty's Dead,Agatha Christie,Belgians,http://books.google.com/books/content?id=gcS0d...,An old widow is brutally killed in the parlour...,2002.0,3.83,328.0,525.0,Mrs McGinty's Dead,9780007121007 An old widow is brutally killed ...
506,9780099462255,99462257,Like a Charm,Karin Slaughter,Bracelets,http://books.google.com/books/content?id=HVWED...,The cream of British and American crime writer...,2004.0,3.5,384.0,1241.0,Like a Charm,9780099462255 The cream of British and America...
854,9780143037231,143037234,The Big Over Easy,Jasper Fforde,Fiction,http://books.google.com/books/content?id=tGNdr...,Unconvinced that a former convict and milliona...,2006.0,3.93,383.0,26532.0,The Big Over Easy: A Nursery Crime,9780143037231 Unconvinced that a former convic...
856,9780143037439,143037439,Tooth and Claw,T. Coraghessan Boyle,Fiction,http://books.google.com/books/content?id=ebK4H...,"A collection of fourteen stories includes ""The...",2006.0,3.82,304.0,1165.0,Tooth and Claw,9780143037439 A collection of fourteen stories...
1348,9780316168816,316168815,The Lovely Bones,Alice Sebold,Fiction,http://books.google.com/books/content?id=o-Qfn...,The spirit of fourteen-year-old Susie Salmon d...,2004.0,3.79,328.0,5776.0,The Lovely Bones,9780316168816 The spirit of fourteen-year-old ...
1404,9780330419123,330419129,Under the Banner of Heaven,Jon Krakauer,Mormon fundamentalism,http://books.google.com/books/content?id=pv-q6...,Krakauer shifts his focus from extremes of phy...,2004.0,3.99,369.0,126672.0,Under the Banner of Heaven: A Story of Violent...,9780330419123 Krakauer shifts his focus from e...
1726,9780375756603,375756604,Pipe Dream,Solomon Jones,Fiction,http://books.google.com/books/content?id=95RBD...,The murder of a Philadelphia politician with a...,2001.0,3.98,341.0,136.0,Pipe Dream: A Novel,9780375756603 The murder of a Philadelphia pol...
2256,9780446359207,446359203,Carrion Comfort,Dan Simmons,Fiction,http://books.google.com/books/content?id=37wWp...,"Three elderly friends, who possess supernatura...",1990.0,3.9,884.0,15056.0,Carrion Comfort,"9780446359207 Three elderly friends, who posse..."
2266,9780446578424,446578428,The Serial Killers Club,Jeff Povey,Fiction,http://books.google.com/books/content?id=3ZBml...,Killing a serial murderer while defending hims...,2006.0,3.21,279.0,105.0,The Serial Killers Club: A Novel,9780446578424 Killing a serial murderer while ...
2335,9780449149911,449149919,"Lost Boy, Lost Girl",Peter Straub,Fiction,http://books.google.com/books/content?id=YWrcm...,The bizarre suicide of a woman and the disappe...,2004.0,3.48,368.0,3940.0,"Lost Boy, Lost Girl",9780449149911 The bizarre suicide of a woman a...
