In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma

In [None]:
from dotenv import load_dotenv

load_dotenv()

True

In [2]:
import pandas as pd

books = pd.read_csv("books_cleaned.csv")

In [3]:
books["tagged_description"]

0       9780002005883 A NOVEL THAT READERS and critics...
1       9780002261982 A new 'Christie for Christmas' -...
2       9780006178736 A memorable, mesmerizing heroine...
3       9780006280897 Lewis' work on the nature of lov...
4       9780006280934 "In The Problem of Pain, C.S. Le...
                              ...                        
5192    9788172235222 On A Train Journey Home To North...
5193    9788173031014 This book tells the tale of a ma...
5194    9788179921623 Wisdom to Create a Life of Passi...
5195    9788185300535 This collection of the timeless ...
5196    9789027712059 Since the three volume edition o...
Name: tagged_description, Length: 5197, dtype: object

In [11]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep = "\n",
                                   index=False,
                                   header=False)

In [25]:
raw_document = TextLoader("tagged_description.txt", encoding="utf-8").load()
text_splitter = CharacterTextSplitter(chunk_size=500, chunk_overlap=0, separator="\n")
documents = text_splitter.split_documents(raw_document)

Created a chunk of size 1168, which is longer than the specified 500
Created a chunk of size 1214, which is longer than the specified 500
Created a chunk of size 960, which is longer than the specified 500
Created a chunk of size 843, which is longer than the specified 500
Created a chunk of size 881, which is longer than the specified 500
Created a chunk of size 1088, which is longer than the specified 500
Created a chunk of size 1189, which is longer than the specified 500
Created a chunk of size 513, which is longer than the specified 500
Created a chunk of size 752, which is longer than the specified 500
Created a chunk of size 728, which is longer than the specified 500
Created a chunk of size 721, which is longer than the specified 500
Created a chunk of size 1267, which is longer than the specified 500
Created a chunk of size 681, which is longer than the specified 500
Created a chunk of size 553, which is longer than the specified 500
Created a chunk of size 521, which is longe

In [27]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, Gi

In [33]:
db_books = Chroma.from_documents(
    documents,
    embedding=OpenAIEmbeddings()
)

In [35]:
query = "A book to teach children about nature"
docs = db_books.similarity_search(query, k = 10)
docs

[Document(id='f5189034-6691-45fc-bad0-2b9d4a3084f5', metadata={'source': 'tagged_description.txt'}, page_content='9780786808069 Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience.'),
 Document(id='fe82d6de-1c7d-493f-83d9-3b80727b1196', metadata={'source': 'tagged_description.txt'}, page_content="9780786808380 Introduce your babies to birds, cats, dogs, and babies through fine art, illustration, and photographs. These books are a rare opportunity to expose little ones to a range of images on a single subject, from simple child's drawings and abstract art to playful photos. A brief text accompanies each image, introducing the baby to some basic -- and sometimes playful -- information about the subjects."),
 Document(id='73f907b1-07ff-4c82-869b-1326e3522550', metadata={'source': '

In [66]:
books[books['isbn13'] == int(docs[0].page_content.split()[0].strip())]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
3747,9780786808069,786808063,Baby Einstein: Neighborhood Animals,Marilyn Singer;Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=X9a4P...,Children will discover the exciting world of t...,2001.0,3.89,16.0,180.0,Baby Einstein: Neighborhood Animals,9780786808069 Children will discover the excit...


In [109]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_books.similarity_search(query, k= 20)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return books[books['isbn13'].isin(books_list)].head(top_k)

In [110]:
retrieve_semantic_recommendations("A book to teach children about nature")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
31,9780007105045,0007105045,Tree and Leaf,John Ronald Reuel Tolkien,Literary Collections,http://books.google.com/books/content?id=aPb_A...,"""The two works 'On fairy-stories' and 'Leaf by...",2001.0,4.09,176.0,2245.0,Tree and Leaf: The Homecoming of Beorhtnoth : ...,"9780007105045 ""The two works 'On fairy-stories..."
429,9780064434980,0064434982,The Deer in the Wood,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=V7YDW...,Even the youngest child can enjoy a special ad...,1999.0,4.17,32.0,302.0,The Deer in the Wood,9780064434980 Even the youngest child can enjo...
442,9780067575208,006757520X,The Sense of Wonder,Rachel Carson,Nature,http://books.google.com/books/content?id=Zee5S...,"First published more than three decades ago, t...",1998.0,4.39,112.0,1160.0,The Sense of Wonder,9780067575208 First published more than three ...
692,9780140448009,0140448004,Three Tales,Gustave Flaubert;Roger Whitehouse;Geoffrey Wall,Fiction,http://books.google.com/books/content?id=XFzga...,Features short fiction by the French naturalis...,2005.0,3.71,110.0,3050.0,Three Tales,9780140448009 Features short fiction by the Fr...
1077,9780240806082,0240806085,Directing the Documentary,Michael Rabiger,Performing Arts,http://books.google.com/books/content?id=uoKli...,Michael Rabiger guides the reader through the ...,2004.0,4.23,648.0,173.0,Directing the Documentary,9780240806082 Michael Rabiger guides the reade...
1639,9780374422080,0374422087,Everything on a Waffle,Polly Horvath,Juvenile Fiction,http://books.google.com/books/content?id=NimVJ...,This Newbery Honor Book tells the story of 11 ...,2004.0,3.71,150.0,9631.0,Everything on a Waffle,9780374422080 This Newbery Honor Book tells th...
2410,9780451457592,0451457595,The River's Gift,Mercedes Lackey,Fiction,http://books.google.com/books/content?id=qLonv...,Fifteen-year-old Ariella spends her days in th...,1999.0,3.88,128.0,1029.0,The River's Gift,9780451457592 Fifteen-year-old Ariella spends ...
2805,9780571084838,0571084834,Lord of the Flies,William Golding,Airplane crash survival,,After a plane crash strands them on a tropical...,1954.0,3.67,223.0,4498.0,Lord of the Flies,9780571084838 After a plane crash strands them...
2942,9780618711666,061871166X,The Hungry Tide,Amitav Ghosh,Fiction,http://books.google.com/books/content?id=LN16i...,"Presents a novel of life in modern India, chro...",2006.0,3.94,333.0,11220.0,The Hungry Tide,9780618711666 Presents a novel of life in mode...
3061,9780679451211,0679451218,The Sibley Field Guide to Birds of Western Nor...,,Nature,http://books.google.com/books/content?id=Fd0PA...,A guide for bird enthusiasts specifically desi...,2003.0,4.7,473.0,709.0,The Sibley Field Guide to Birds of Western Nor...,9780679451211 A guide for bird enthusiasts spe...
