In [1]:
from langchain_community.document_loaders.firecrawl import FireCrawlLoader
loader = FireCrawlLoader(
    api_key="fc-9c97ac556db949c3b338e01fa327f5ca", 
    url="https://www.myntra.com/women-jewellery", 
    mode="crawl"
)

In [2]:
data = loader.load()

In [3]:
data

[Document(metadata={'url': 'https://www.myntra.com/women-jewellery', 'ogUrl': 'https://www.myntra.com/women-jewellery', 'title': 'Jewellery For Women - Buy Women Jewellery Online in India | Myntra', 'og:url': 'https://www.myntra.com/women-jewellery', 'favicon': {}, 'og:type': 'website', 'ogImage': 'https://constant.myntassets.com/www/data/portal/mlogo.png', 'ogTitle': "Women's Jewellery India | Buy Jewellery for Women Online in India", 'keywords': 'jewellery, women jewellery, shop online for women jewellery, jewellery for women online, online shopping for women jewellery in india,', 'language': 'en', 'og:image': 'https://constant.myntassets.com/www/data/portal/mlogo.png', 'og:title': "Women's Jewellery India | Buy Jewellery for Women Online in India", 'scrapeId': '041ad35c-de52-4ecd-8349-c377e8580e46', 'viewport': ['width=device-width, initial-scale=1.0, user-scalable=no', 'width=device-width, initial-scale=1'], 'fb:admins': '520074227', 'fb:app_id': '182424375109898', 'sourceURL': 'ht

In [7]:
# Create Chunks
from langchain_text_splitters import RecursiveCharacterTextSplitter
def create_chunks(documents): 
    text_splitter = RecursiveCharacterTextSplitter(
        chunk_size = 1000,
        chunk_overlap = 200,
        add_start_index = True
    )
    text_chunks = text_splitter.split_documents(documents)
    return text_chunks

text_chunks = create_chunks(data)

In [8]:
text_chunks

[Document(metadata={'url': 'https://www.myntra.com/women-jewellery', 'ogUrl': 'https://www.myntra.com/women-jewellery', 'title': 'Jewellery For Women - Buy Women Jewellery Online in India | Myntra', 'og:url': 'https://www.myntra.com/women-jewellery', 'favicon': {}, 'og:type': 'website', 'ogImage': 'https://constant.myntassets.com/www/data/portal/mlogo.png', 'ogTitle': "Women's Jewellery India | Buy Jewellery for Women Online in India", 'keywords': 'jewellery, women jewellery, shop online for women jewellery, jewellery for women online, online shopping for women jewellery in india,', 'language': 'en', 'og:image': 'https://constant.myntassets.com/www/data/portal/mlogo.png', 'og:title': "Women's Jewellery India | Buy Jewellery for Women Online in India", 'scrapeId': '653d16a4-e293-4a5a-869b-4d9183dba3e9', 'viewport': ['width=device-width, initial-scale=1.0, user-scalable=no', 'width=device-width, initial-scale=1'], 'fb:admins': '520074227', 'fb:app_id': '182424375109898', 'sourceURL': 'ht

In [10]:
len(text_chunks)

47

In [11]:
# Generate Embeddings & Store in Vector Database
from langchain_google_genai import GoogleGenerativeAIEmbeddings
from langchain_community.vectorstores import FAISS

def get_embedding_model():
    embeddings = GoogleGenerativeAIEmbeddings(model="models/text-embedding-004")
    return embeddings



# Index Documents **Store embeddings in FAISS (vector store)
FAISS_DB_PATH="Vectorstore/db_faiss"
faiss_db=FAISS.from_documents(text_chunks, get_embedding_model())
faiss_db.save_local(FAISS_DB_PATH)

  from .autonotebook import tqdm as notebook_tqdm


In [15]:

from langchain_google_genai import ChatGoogleGenerativeAI
from langchain_core.prompts import ChatPromptTemplate
import os

import os
from dotenv import load_dotenv
load_dotenv()

GOOGLE_API_KEY = os.getenv("GOOGLE_API_KEY")

llm_model = ChatGoogleGenerativeAI(model="gemini-2.0-flash", api_key=GOOGLE_API_KEY)


# Retrieve Docs
def retrieve_docs(query):
    return faiss_db.similarity_search(query) # Similarity search

# get only the content part
def get_context(documents):
    context = "\n\n".join([doc.page_content for doc in documents])
    return context


# Answer Question
custom_prompt_template = """
Use the pieces of information provided in the context to answer user's question.
If you dont know the answer, just say that you dont know, dont try to make up an answer. 
Dont provide anything out of the given context
Question: {question} 
Context: {context} 
Answer:
"""



def answer_query(documents, model, query):
    context = get_context(documents)
    prompt = ChatPromptTemplate.from_template(custom_prompt_template)
    chain = prompt | model # | -> Pipe function, pass the prompt in which model
    response = chain.invoke({"question": query, "context": context})
    # Extract and return only the text response, handling potential metadata issues
    return response.content if hasattr(response, 'content') else str(response)

In [17]:
# Testing
question="What is the Price of   Rubans Rose Gold-Plated Beaded & Stone-Studded Mangalsutra ?"
retrieved_docs=retrieve_docs(question)
print(answer_query(documents=retrieved_docs, model=llm_model, query=question))

The price of the Rubans Rose Gold-Plated Beaded & Stone-Studded Mangalsutra is Rs. 504 Rs. 1400 (64% OFF).
