In [30]:
from langchain_community.document_loaders import TextLoader
# Text splitters
from langchain_text_splitters import CharacterTextSplitter

# OpenAI embeddings
from langchain_huggingface import HuggingFaceEmbeddings


# Vectorstore (Chroma)
from langchain_community.vectorstores import Chroma

In [31]:
import pandas as pd

In [32]:
books=pd.read_csv("books_cleaned.csv")

In [33]:
books["tagged_description"]

0       9780002005883; A NOVEL THAT READERS and critic...
1       9780002261982; A new 'Christie for Christmas' ...
2       9780006178736; A memorable, mesmerizing heroin...
3       9780006280897; Lewis' work on the nature of lo...
4       9780006280934; "In The Problem of Pain, C.S. L...
                              ...                        
5192    9788172235222; On A Train Journey Home To Nort...
5193    9788173031014; This book tells the tale of a m...
5194    9788179921623; Wisdom to Create a Life of Pass...
5195    9788185300535; This collection of the timeless...
5196    9789027712059; Since the three volume edition ...
Name: tagged_description, Length: 5197, dtype: object

In [34]:
books["tagged_description"].to_csv("tagged_description.txt",sep="\n",index=False,header=False)

In [35]:
!pip install chardet




In [36]:
import chardet

with open("tagged_description.txt", "rb") as f:
    raw = f.read(50000)  # sample
    print(chardet.detect(raw))


{'encoding': 'utf-8', 'confidence': 0.99, 'language': ''}


In [37]:
raw_documents = TextLoader("tagged_description.txt", encoding="utf-8").load()
text_splitter = CharacterTextSplitter(chunk_size=1000 , chunk_overlap=0 , separator = '\n')
documents = text_splitter.split_documents(raw_documents)

Created a chunk of size 1169, which is longer than the specified 1000
Created a chunk of size 1215, which is longer than the specified 1000
Created a chunk of size 1089, which is longer than the specified 1000
Created a chunk of size 1190, which is longer than the specified 1000
Created a chunk of size 1268, which is longer than the specified 1000
Created a chunk of size 2011, which is longer than the specified 1000
Created a chunk of size 1226, which is longer than the specified 1000
Created a chunk of size 1185, which is longer than the specified 1000
Created a chunk of size 1215, which is longer than the specified 1000
Created a chunk of size 1192, which is longer than the specified 1000
Created a chunk of size 1058, which is longer than the specified 1000
Created a chunk of size 1271, which is longer than the specified 1000
Created a chunk of size 1636, which is longer than the specified 1000
Created a chunk of size 1133, which is longer than the specified 1000
Created a chunk of s

In [38]:
documents[0]

Document(metadata={'source': 'tagged_description.txt'}, page_content='9780002005883; A NOVEL THAT READERS and critics have been eagerly anticipating for over a decade, Gilead is an astonishingly imagined story of remarkable lives. John Ames is a preacher, the son of a preacher and the grandson (both maternal and paternal) of preachers. It’s 1956 in Gilead, Iowa, towards the end of the Reverend Ames’s life, and he is absorbed in recording his family’s story, a legacy for the young son he will never see grow up. Haunted by his grandfather’s presence, John tells of the rift between his grandfather and his father: the elder, an angry visionary who fought for the abolitionist cause, and his son, an ardent pacifist. He is troubled, too, by his prodigal namesake, Jack (John Ames) Boughton, his best friend’s lost son who returns to Gilead searching for forgiveness and redemption. Told in John Ames’s joyous, rambling voice that finds beauty, humour and truth in the smallest of life’s details, G

In [39]:
# 5️⃣ Initialize free Hugging Face embeddings
embeddings = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")

# 6️⃣ Create a Chroma vector store
db_books = Chroma.from_documents(documents, embedding=embeddings)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [41]:
query="A book to teach children about nature"
docs = db_books.similarity_search(query,k=3)
docs

[Document(metadata={'source': 'tagged_description.txt'}, page_content="9780786808069; Children will discover the exciting world of their own backyard in this introduction to familiar animals from cats and dogs to bugs and frogs. The combination of photographs, illustrations, and fun facts make this an accessible and delightful learning experience.\n9780786808373; Introducing your baby to birds, cats, dogs, and babies through fine art, illsutration and photographs. These books are a rare opportunity to expose little ones to a range of images on a single subject, from simple child's drawings and abstract art to playful photos. A brief text accompanies each image, introducing baby to some basic -- and sometimes playful -- information on the subjects."),
 Document(metadata={'source': 'tagged_description.txt'}, page_content="9780064402453; ‘Racso, a brash and boastful little rodent, is making his way to Thorn Valley, determined to learn how to read and write and become a hero. His bragging 

In [42]:
## This is just returning the descriptions , so to get recomendations of book titles,authors,etc

In [44]:
books[books["isbn13"]==int(docs[0].page_content.split()[0].strip().rstrip(";"))]

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_&_subtitle,tagged_description
3747,9780786808069,786808063,Baby Einstein: Neighborhood Animals,Marilyn Singer;Julie Aigner-Clark,Juvenile Fiction,http://books.google.com/books/content?id=X9a4P...,Children will discover the exciting world of t...,2001.0,3.89,16.0,180.0,Baby Einstein: Neighborhood Animals,9780786808069; Children will discover the exci...


In [53]:
def retrieve_seamantic_recommendations(
    query: str,
    top_k: int=10,
) -> pd.DataFrame:
    recs=db_books.similarity_search(query,k=50)

    books_list=[]

    for i in range(0, len(recs)):
        books_list+=[int(recs[i].page_content.strip('"').split()[0].rstrip(";"))]

    return books[books["isbn13"].isin(books_list)].head(top_k)

In [54]:
retrieve_seamantic_recommendations("A book to teach children about nature")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_&_subtitle,tagged_description
30,9780006646006,000664600X,Ocean Star Express,Mark Haddon;Peter Sutton,Juvenile Fiction,http://books.google.com/books/content?id=I2QZA...,Joe and his parents are enjoying a summer holi...,2002.0,3.5,32.0,1.0,Ocean Star Express,9780006646006; Joe and his parents are enjoyin...
59,9780007151240,0007151241,The Family Way,Tony Parsons,Parenthood,http://books.google.com/books/content?id=dJEIx...,It should be the most natural thing in the wor...,2005.0,3.51,400.0,2095.0,The Family Way,9780007151240; It should be the most natural t...
223,9780060775858,0060775858,Goodnight Moon 60th Anniversary Edition,Margaret Wise Brown,Juvenile Fiction,http://books.google.com/books/content?id=lLYOr...,"In a great green room, tucked away in bed, is ...",2005.0,4.27,32.0,264013.0,Goodnight Moon 60th Anniversary Edition,"9780060775858; In a great green room, tucked a..."
383,9780061144899,0061144894,When the Heart Waits,Sue Monk Kidd,Religion,http://books.google.com/books/content?id=JlP91...,From the Bestselling Author of The Secret Life...,2006.0,4.17,240.0,2141.0,When the Heart Waits; Spiritual Direction for ...,9780061144899; From the Bestselling Author of ...
392,9780061208492,0061208493,The Complete C. S. Lewis Signature Classics,C. S. Lewis,Religion,http://books.google.com/books/content?id=JaC0_...,Seven Spiritual Masterworks by C. S. Lewis Thi...,2007.0,4.61,746.0,873.0,The Complete C. S. Lewis Signature Classics,9780061208492; Seven Spiritual Masterworks by ...
404,9780064402453,0064402452,Racso and the Rats of NIMH,Jane Leslie Conly,Juvenile Fiction,http://books.google.com/books/content?id=MgoNv...,"‘Racso, a brash and boastful little rodent, is...",1988.0,3.76,288.0,3231.0,Racso and the Rats of NIMH,"9780064402453; ‘Racso, a brash and boastful li..."
406,9780064403870,0064403874,"R-T, Margaret, and the Rats of NIMH",Jane Leslie Conly,Juvenile Fiction,http://books.google.com/books/content?id=WTHHH...,"When Margaret and her younger brother, Artie, ...",1991.0,3.52,272.0,631.0,"R-T, Margaret, and the Rats of NIMH",9780064403870; When Margaret and her younger b...
407,9780064404419,0064404412,The Rainbow People,Laurence Yep,Juvenile Fiction,http://books.google.com/books/content?id=5AHwq...,"""Culled from 69 stories collected in a [1930s]...",1992.0,3.75,208.0,202.0,The Rainbow People,"9780064404419; ""Culled from 69 stories collect..."
416,9780064406925,006440692X,Winter on the Farm,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=IvlKH...,The Little House books tell the story of a lit...,1997.0,4.13,32.0,400.0,Winter on the Farm,9780064406925; The Little House books tell the...
427,9780064434874,0064434877,Christmas in the Big Woods,Laura Ingalls Wilder,Juvenile Fiction,http://books.google.com/books/content?id=FT1Yp...,"Long ago, a little girl named Laura Ingalls li...",1997.0,4.19,32.0,2062.0,Christmas in the Big Woods,"9780064434874; Long ago, a little girl named L..."
