In [1]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain_openai import OpenAIEmbeddings
from langchain_chroma import Chroma
from langchain_huggingface import HuggingFaceEmbeddings

In [2]:
import pandas as pd

In [3]:
raw_df = pd.read_csv("books_cleaned.csv")

raw_df

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
0,9780002005883,0002005883,Gilead,Marilynne Robinson,Fiction,http://books.google.com/books/content?id=KQZCP...,A NOVEL THAT READERS and critics have been eag...,2004.0,3.85,247.0,361.0,Gilead,9780002005883 A NOVEL THAT READERS and critics...
1,9780002261982,0002261987,Spider's Web,Charles Osborne;Agatha Christie,Detective and mystery stories,http://books.google.com/books/content?id=gA5GP...,A new 'Christie for Christmas' -- a full-lengt...,2000.0,3.83,241.0,5164.0,Spider's Web: A Novel,9780002261982 A new 'Christie for Christmas' -...
2,9780006178736,0006178731,Rage of angels,Sidney Sheldon,Fiction,http://books.google.com/books/content?id=FKo2T...,"A memorable, mesmerizing heroine Jennifer -- b...",1993.0,3.93,512.0,29532.0,Rage of angels,"9780006178736 A memorable, mesmerizing heroine..."
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4,9780006280934,0006280935,The Problem of Pain,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=Kk-uV...,"""In The Problem of Pain, C.S. Lewis, one of th...",2002.0,4.09,176.0,37569.0,The Problem of Pain,"9780006280934 ""In The Problem of Pain, C.S. Le..."
...,...,...,...,...,...,...,...,...,...,...,...,...,...
5192,9788172235222,8172235224,Mistaken Identity,Nayantara Sahgal,Indic fiction (English),http://books.google.com/books/content?id=q-tKP...,On A Train Journey Home To North India After L...,2003.0,2.93,324.0,0.0,Mistaken Identity,9788172235222 On A Train Journey Home To North...
5193,9788173031014,8173031010,Journey to the East,Hermann Hesse,Adventure stories,http://books.google.com/books/content?id=rq6JP...,This book tells the tale of a man who goes on ...,2002.0,3.70,175.0,24.0,Journey to the East,9788173031014 This book tells the tale of a ma...
5194,9788179921623,817992162X,The Monk Who Sold His Ferrari: A Fable About F...,Robin Sharma,Health & Fitness,http://books.google.com/books/content?id=c_7mf...,"Wisdom to Create a Life of Passion, Purpose, a...",2003.0,3.82,198.0,1568.0,The Monk Who Sold His Ferrari: A Fable About F...,9788179921623 Wisdom to Create a Life of Passi...
5195,9788185300535,8185300534,I Am that,Sri Nisargadatta Maharaj;Sudhakar S. Dikshit,Philosophy,http://books.google.com/books/content?id=Fv_JP...,This collection of the timeless teachings of o...,1999.0,4.51,531.0,104.0,I Am that: Talks with Sri Nisargadatta Maharaj,9788185300535 This collection of the timeless ...


In [10]:
raw_df["tagged_description"].to_csv("tagged_description.txt",
                                    index=False,
                                    header=False)

In [12]:
raw_documents = TextLoader("tagged_description.txt").load()
text_splitter = CharacterTextSplitter(
    separator = "\n",
    chunk_size = 10000,
    chunk_overlap = 0,
)
documents = text_splitter.split_documents(raw_documents)

In [13]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='"9780002005883 A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, G

In [15]:
db_raw_df = Chroma.from_documents(documents,
                            HuggingFaceEmbeddings(model_name="sentence-transformers/all-MiniLM-L6-v2"))

In [17]:
query = "A book to teach children about nature"
docs = db_raw_df.similarity_search(query, k = 10)
docs

[Document(id='a3ff8bd6-24cc-4d83-9626-7baa0a9f9f41', metadata={'source': 'tagged_description.txt'}, page_content='"9780067575208 First published more than three decades ago, this reissue of Rachel Carson\'s award-winning classic brings her unique vision to a new generation of readers. Stunning new photographs by Nick Kelsh beautifully complement Carson\'s intimate account of adventures with her young nephew, Roger, as they enjoy walks along the rocky coast of Maine and through dense forests and open fields, observing wildlife, strange plants, moonlight and storm clouds, and listening to the ""living music"" of insects in the underbrush. ""If a child is to keep alive his inborn sense of wonder."" Writes Carson, ""he needs the companionship of at least one adult who can share it, rediscovering with him the joy, excitement and mystery of the world we live in."" The Sense of Wonder is a refreshing antidote to indifference and a guide to capturing the simple power of discovery that Carson v

In [19]:
raw_df[raw_df["isbn13"] == int(docs[0].page_content.split()[0].strip('"'))]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
442,9780067575208,006757520X,The Sense of Wonder,Rachel Carson,Nature,http://books.google.com/books/content?id=Zee5S...,"First published more than three decades ago, t...",1998.0,4.39,112.0,1160.0,The Sense of Wonder,9780067575208 First published more than three ...


In [20]:
def retrieve_semantic_recommendations(
        query: str,
        top_k: int = 10,
) -> pd.DataFrame:
    recs = db_raw_df.similarity_search(query, k = 50)

    books_list = []

    for i in range(0, len(recs)):
        books_list += [int(recs[i].page_content.strip('"').split()[0])]

    return raw_df[raw_df["isbn13"].isin(books_list)]

In [21]:
retrieve_semantic_recommendations("A book to teach children about nature", top_k=10)

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
260,9780060852559,0060852550,"Animal, Vegetable, Miracle",Barbara Kingsolver;Camille Kingsolver;Steven L...,Biography & Autobiography,http://books.google.com/books/content?id=qLkEY...,Bestselling author Barbara Kingsolver returns ...,2007.0,4.04,370.0,86130.0,"Animal, Vegetable, Miracle: A Year of Food Life",9780060852559 Bestselling author Barbara Kings...
274,9780060887964,0060887966,The Alchemist - Gift Edition,Paulo Coelho,Fiction,http://books.google.com/books/content?id=h6QuJ...,"""My heart is afraid that it will have to suffe...",2006.0,3.85,192.0,814.0,The Alchemist - Gift Edition,"9780060887964 ""My heart is afraid that it will..."
306,9780060932664,006093266X,Collected Novellas,Gabriel Garcia Marquez,Fiction,http://books.google.com/books/content?id=JRcVu...,"Renowned as a master of magical realism, Gabri...",1999.0,4.01,288.0,822.0,Collected Novellas,9780060932664 Renowned as a master of magical ...
316,9780060938109,0060938102,The Schopenhauer Cure,Irvin Yalom,Fiction,http://books.google.com/books/content?id=aDG4l...,Suddenly confronted with his own mortality aft...,2006.0,4.21,358.0,8663.0,The Schopenhauer Cure: A Novel,9780060938109 Suddenly confronted with his own...
391,9780061205699,0061205699,To Kill a Mockingbird (slipcased edition),Harper Lee,Fiction,http://books.google.com/books/content?id=M9lKH...,"At the age of eight, Scout Finch is an entrenc...",2006.0,4.27,323.0,250.0,To Kill a Mockingbird (slipcased edition),"9780061205699 At the age of eight, Scout Finch..."
430,9780064435260,0064435261,A Little Prairie House,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=pRSju...,"Long, long ago, a little girl named Laura Inga...",1999.0,4.19,32.0,1533.0,A Little Prairie House,"9780064435260 Long, long ago, a little girl na..."
442,9780067575208,006757520X,The Sense of Wonder,Rachel Carson,Nature,http://books.google.com/books/content?id=Zee5S...,"First published more than three decades ago, t...",1998.0,4.39,112.0,1160.0,The Sense of Wonder,9780067575208 First published more than three ...
709,9780140620474,0140620478,Far from the Madding Crowd,Thomas Hardy,England,http://books.google.com/books/content?id=6K8FZ...,Independent and spirited Bathsheba Everdene ha...,1994.0,3.94,374.0,312.0,Far from the Madding Crowd,9780140620474 Independent and spirited Bathshe...
1383,9780330334617,0330334611,The Border Trilogy,Cormac McCarthy,Fiction,http://books.google.com/books/content?id=QeGjw...,"Cormac McCarthy’s award-winning, bestselling t...",2002.0,4.43,1038.0,319.0,The Border Trilogy: Picador Classic,"9780330334617 Cormac McCarthy’s award-winning,..."
1733,9780375760136,037576013X,Daniel Deronda,George Eliot,Fiction,http://books.google.com/books/content?id=uPiMx...,"Deronda, a high-minded young man searching for...",1876.0,3.83,796.0,19852.0,Daniel Deronda,"9780375760136 Deronda, a high-minded young man..."
