## Get the Product Data's  from the web server


In [218]:
import requests
from bs4 import BeautifulSoup
from autoscraper import AutoScraper
import faiss
import numpy as np
import time
import warnings
warnings.filterwarnings("ignore")


In [219]:
urls = ["https://www.hdfcbank.com/personal/pay/cards/credit-cards",
    "https://www.hdfcbank.com/personal/pay/cards/debit-cards",
    "https://www.hdfcbank.com/personal/pay/cards/forex-cards",
    "https://www.hdfcbank.com/personal/pay/cards/millennia-cards",
]

cards_info = {}

for url in urls:
    response = requests.get(url)
    soup = BeautifulSoup(response.content, "html.parser")

    for card in soup.find_all("div", class_="cardWrapper"):
        title_tag = card.find("span", class_="card-name")
        desc_tag = card.find("div", class_="card-des")

        if title_tag and desc_tag:
            title = title_tag.get_text(strip=True)
            desc = desc_tag.get_text(strip=True)
            cards_info[title] = desc


In [222]:
# Convert dictionary to DataFrame
df = pd.DataFrame(cards_info.items(), columns=["Products", "Description"])



In [238]:
df.head(50)

Unnamed: 0,Products,Description
0,PIXEL Play Credit Card,The Born Digital Range of Credit Cards
1,Freedom Credit Card,The Best Card for Your Everyday Spends & Big P...
2,IndianOil HDFC Bank Credit Card,Fuel Benefits. Ready For You.
3,Diners Club Privilege Credit Card,Global Belonging. Ready for You
4,Millennia Credit Card,The Best CashBack Credit Card
5,MoneyBack+ Credit Card,The Most Rewarding Card For Everyday Spends
6,Tata Neu Plus HDFC Bank Credit Card,Extraordinary Rewards. Ready for You.
7,Swiggy HDFC Bank Credit Card,Delightful Rewards. Ready For You
8,Regalia Gold Credit Card,Experience the Golden Side Of Life
9,INFINIA Metal Edition,By Invite only


In [223]:
## Load the data

from langchain.schema import Document

documents= [
    Document (
        page_content=f"{row['Products']}:{row['Description']}",
        metadata={'source':"scarped_hdfc"}
     )
     for _, row in df.iterrows()
]


In [224]:
## Environment Setup
import os

from dotenv import load_dotenv

load_dotenv()

GROQ_API_KEY =os.getenv("GROQ_API_KEY")
HF_TOKEN =os.getenv("HF_TOKEN")

In [225]:
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain.prompts import PromptTemplate,FewShotPromptTemplate
from langchain_core.prompts import ChatPromptTemplate

from langchain.embeddings import HuggingFaceEmbeddings
from langchain.vectorstores import FAISS
from langchain.chains import RetrievalQA
from langchain_groq import ChatGroq


In [226]:
# Split text
splitter = RecursiveCharacterTextSplitter(chunk_size=100, chunk_overlap=5)
docs = splitter.split_documents(documents)

In [249]:
# Faiss DB store
embeddings = HuggingFaceEmbeddings(
        model_name="sentence-transformers/all-MiniLM-L6-v2",
        model_kwargs={"device": "cpu"}
)
    

vectorstore = FAISS.from_documents(docs, embeddings)

vectorstore.save_local("vector_store/hdfc_faiss")

In [245]:
## LLM Configuration
llm=ChatGroq(
    model_name="llama3-70b-8192",
    temperature=0,
    top_p=1
)

In [234]:
# Prompt Design

system_message = """ 
You are a helpful and knowledgeable assistant.
Answer questions using the provided context. 
If the answer is not in the context, say you don't know.
"""
examples = [
    {
        "question": "What is the best credit card for cashback?",
        "answer": "According to the information, the Millennia Credit Card is the best cashback credit card."
    },
    {
        "question": "Which card gives fuel benefits?",
        "answer": "The IndianOil HDFC Bank Credit Card offers fuel benefits."
    }
]
example_template=PromptTemplate(
    input_variables=["question","answer"],
    template="Q :{question}\nA:{answer}"
)

# Few Shot Prompt

few_shot_prompt = FewShotPromptTemplate(
    examples=examples,
    example_prompt=example_template,
    prefix=system_message + "\n\nHere are some examples:",
    suffix="Context:\n{context}\n\nQ: {question}\nA:",
    input_variables=["question", "context"]
)



In [248]:
## Reterival_chain
retriever = vectorstore.as_retriever(search_kwargs={"k":20})


# Build QA chain
qa = RetrievalQA.from_chain_type(llm=llm, retriever=retriever)

# Ask a question
response = qa.run("all debit card please")
print(response)

Here is the list of debit cards mentioned:

1. Regular Debit Card
2. Business Debit Card
3. Platinum Debit Card
4. Millennia Debit Card
5. GIGA Business Debit Card
6. Infiniti Debit Card
7. Classic Platinum Debit Card
8. Titanium Royale Debit Card
9. Titanium Debit Card
10. EasyShop Platinum Debit Card
11. RuPay PMJDY Debit Card
12. Vishesh Platinum Debit Card
13. EasyShop Vishesh Debit Card
14. Preferred Platinum Debit Card
15. EasyShop Classic Platinum Debit Card
16. Visa NRO Debit Card
17. RuPay Platinum Debit Card
18. Rupay NRO Debit Card
19. Kids Advantage Debit Card
20. EasyShop Preferred Platinum Debit Card
