In [None]:
from langchain_community.document_loaders import TextLoader
from langchain_text_splitters import CharacterTextSplitter
from langchain.embeddings import HuggingFaceEmbeddings
from langchain_chroma import Chroma
from langchain.docstore.document import Document

In [None]:
from dotenv import load_dotenv
load_dotenv()

In [None]:
import pandas as pd
books=pd.read_csv("books_cleaned.csv")

In [None]:
books["tagged_description"]

In [None]:
books["tagged_description"].to_csv("tagged_description.txt",
                                   sep="\n",
                                   header=False,
                                   index=False)

In [None]:
raw_documents=TextLoader("tagged_description.txt",encoding="utf-8").load()
text_splitter=CharacterTextSplitter(chunk_size=0,chunk_overlap=0,separator="\n")
#documents=text_splitter.split_documents(raw_documents)
from langchain.docstore.document import Document

# Build documents directly from your books DataFrame
documents = [
    Document(
        page_content=str(row["tagged_description"]),
        metadata={"isbn13": row["isbn13"], "title": row["title"]}
    )
    for _, row in books.iterrows()
]


In [None]:
documents[0]

In [None]:
import os
import sentence_transformers
import tf_keras as keras

# Loading env variables
model_name = os.getenv("EMBEDDING_MODEL")
chroma_dir = os.getenv("CHROMA_DB_DIR")

# Load the Hugging Face embedding model
embedding_model = HuggingFaceEmbeddings(model_name=model_name)

# Create Chroma vector DB
db_books = Chroma.from_documents(
    documents=documents,
    embedding=embedding_model,
    persist_directory=chroma_dir
)

In [None]:
print("Model name:", os.getenv("EMBEDDING_MODEL"))
print("Chroma dir:", os.getenv("CHROMA_DB_DIR"))
print(type(db_books))

In [None]:
def retrieve_semantic_recomendations(
        query:str,
        top_k: int=10,
)-> pd.DataFrame:
    recs=db_books.similarity_search(query,k=50)
    books_list=[]

    for i in range (0,len(recs)):
        books_list+=[int(recs[i].page_content.strip('"').split()[0])]
    
    return books[books["isbn13"].isin(books_list)].head(top_k)

In [None]:
def retrieve_semantic_recommendations(query: str, top_k: int = 10) -> pd.DataFrame:
    # Increase k to get more candidates for filtering
    recs = db_books.similarity_search(query, k=top_k * 20)
    
    # Collect ISBNs and titles for better matching
    isbn_list = []
    for rec in recs:
        if "isbn13" in rec.metadata:
            isbn_list.append(int(rec.metadata["isbn13"]))
    
    filtered_books = books[books["isbn13"].isin(isbn_list)]
    
    # Try to boost results that match the query in title or description
    query_lower = query.lower()
    def score_row(row):
        score = 0
        if query_lower in str(row["title"]).lower():
            score += 2
        if query_lower in str(row["description"]).lower():
            score += 1
        return score

    filtered_books = filtered_books.copy()
    filtered_books["score"] = filtered_books.apply(score_row, axis=1)
    filtered_books = filtered_books.sort_values(by=["score", "average_rating", "ratings_count"], ascending=[False, False, False])
    
    return filtered_books.drop(columns=["score"]).head(top_k)

In [None]:
def retrieve_semantic_recommendations(query: str, top_k: int = 10) -> pd.DataFrame:
    # Increase k to get more candidates for filtering
    recs = db_books.similarity_search(query, k=top_k * 20)
    
    # Collect ISBNs and titles for better matching
    isbn_list = []
    for rec in recs:
        if "isbn13" in rec.metadata:
            isbn_list.append(int(rec.metadata["isbn13"]))
    
    filtered_books = books[books["isbn13"].isin(isbn_list)]
    
    # Try to boost results that match the query in title or description
    query_lower = query.lower()
    def score_row(row):
        score = 0
        if query_lower in str(row["title"]).lower():
            score += 2
        if query_lower in str(row["description"]).lower():
            score += 1
        return score

    filtered_books = filtered_books.copy()
    filtered_books["score"] = filtered_books.apply(score_row, axis=1)
    filtered_books = filtered_books.sort_values(by=["score", "average_rating", "ratings_count"], ascending=[False, False, False])
    
    return filtered_books.drop(columns=["score"]).head(top_k)

In [19]:
retrieve_semantic_recommendations("a heartwarming journey of love and friendship")

Unnamed: 0,isbn13,isbn10,title,authors,categories,thumbnail,description,published_year,average_rating,num_pages,ratings_count,title_and_subtitle,tagged_description
363,9780061120077,0061120073,A Tree Grows in Brooklyn,Betty Smith,Fiction,http://books.google.com/books/content?id=Y-FZ9...,The beloved American classic about a young gir...,2006.0,4.26,496.0,326733.0,A Tree Grows in Brooklyn,9780061120077 The beloved American classic abo...
4841,9781590301876,1590301870,Lovingkindness,Sharon Salzberg;Jon Kabat-Zinn,Self-Help,http://books.google.com/books/content?id=C9kY3...,"In this inspiring book, Sharon Salzberg, one o...",2004.0,4.26,256.0,4072.0,The Revolutionary Art of Happiness: Lovingkind...,"9781590301876 In this inspiring book, Sharon S..."
4619,9781563893339,1563893339,Death,Neil Gaiman;Chris Bachalo;Mark Buckingham;Mike...,Comics & Graphic Novels,http://books.google.com/books/content?id=VIdbP...,"A tale of music, mortality, friendship and dea...",1997.0,4.22,95.0,12206.0,The Time of Your Life: Death,"9781563893339 A tale of music, mortality, frie..."
592,9780140195538,014019553X,The Beloved,Kahlil Gibran;John Walbridge,Fiction,http://books.google.com/books/content?id=KOEKA...,"For Kahlil Gibran (1883-1931), love was the su...",1997.0,4.19,102.0,320.0,Reflections on the Path of the Heart: The Beloved,"9780140195538 For Kahlil Gibran (1883-1931), l..."
5023,9781852864989,1852864982,Death,Neil Gaiman,Death (Fictitious character : Gaiman),http://books.google.com/books/content?id=5XnXO...,"A tale of music, mortality, friendship and dea...",1994.0,4.18,104.0,37678.0,The High Cost of Living: Death,"9781852864989 A tale of music, mortality, frie..."
383,9780061144899,0061144894,When the Heart Waits,Sue Monk Kidd,Religion,http://books.google.com/books/content?id=JlP91...,From the Bestselling Author of The Secret Life...,2006.0,4.17,240.0,2141.0,Spiritual Direction for Life's Sacred Question...,9780061144899 From the Bestselling Author of T...
3846,9780804006941,0804006946,A Woman Speaks,Anaïs Nin;Evelyn J. Hinz,Literary Collections,http://books.google.com/books/content?id=-d5sP...,In this book Anais Nin speaks with warmth and ...,1975.0,4.16,288.0,184.0,"The Lectures, Seminars, and Interviews of Anai...",9780804006941 In this book Anais Nin speaks wi...
3,9780006280897,0006280897,The Four Loves,Clive Staples Lewis,Christian life,http://books.google.com/books/content?id=XhQ5X...,Lewis' work on the nature of love divides love...,2002.0,4.15,170.0,33684.0,The Four Loves,9780006280897 Lewis' work on the nature of lov...
4184,9780877887263,0877887268,The Rock that is Higher,Madeleine L'Engle,Religion,http://books.google.com/books/content?id=Q-8EA...,"We are all strangers in a strange land, longin...",1993.0,4.13,320.0,457.0,Story as Truth: The Rock that is Higher,9780877887263 We are all strangers in a strang...
4434,9781401917173,1401917178,The Power of Infinite Love & Gratitude,Darren R. Weissman,Health & Fitness,http://books.google.com/books/content?id=P6y0m...,Would you like to discover your infinite poten...,2007.0,4.11,298.0,59.0,An Evolutionary Journey to Awakening Your Spir...,9781401917173 Would you like to discover your ...
