In [1]:
import os
from dotenv import load_dotenv
load_dotenv()
os.environ['GOOGLE_API_KEY'] = os.getenv('GOOGLE_API_KEY')
os.environ['LANGCHAIN_API_KEY'] = os.getenv('LANGCHAIN_API_KEY')
os.environ['LANGCHAIN_TRACING_V2'] = "true"
os.environ['LANGCHAIN_PROJECT'] = os.getenv('LANGCHAIN_PROJECT')
url = "https://www.hsbc.com.hk/credit-cards/apply/"

In [3]:
# Data Ingestion - From website scrape the data
from langchain_community.document_loaders import WebBaseLoader

In [4]:
loader = WebBaseLoader(web_path = url)
docs = loader.load()

In [14]:
# Data Chunking 
from langchain_text_splitters import RecursiveCharacterTextSplitter

In [16]:
splitter = RecursiveCharacterTextSplitter(chunk_size = 1000, chunk_overlap = 500)
final_docs = splitter.split_documents(docs)

In [18]:
# Vector Embedding
from langchain_google_genai import GoogleGenerativeAIEmbeddings

embeddings = GoogleGenerativeAIEmbeddings(model="models/embedding-001")

  from .autonotebook import tqdm as notebook_tqdm


In [19]:
# Vector Store
from langchain_community.vectorstores import FAISS

In [None]:
vector_db = FAISS.from_documents(documents=final_docs, embedding=embeddings)

In [22]:
result = vector_db.similarity_search("Money Back App")

In [24]:
result[0].page_content

'Up to $700 RewardCash as welcome offer[@cards-welcomeoffer]\n\n1-year Moneyback VIP status when you successfully link your HSBC easy Credit Card in MoneyBack App account[@cards-1-year-moneyback-vip]. Earn 6X MoneyBack Points when you spend with your card every day.\nExtra $200 RewardCash for new credit card customers who successfully apply for a Spending Instalment Plan[@cards-spi-visa-platinum]\nFirst 2-year annual fee waiver\n\nRewards\n\nUp to 2.4% RewardCash for spending in the Rewards of Your Choice category\nEarn 6X MoneyBack Points (=2.4% rebate) by linking your card in MoneyBack App\n8% shopping discount on Member Day at PARKnSHOP, Watsons and Fortress\n\n\n\n\n\n\n\n                \n                Log on and apply\n            \xa0\nLog on and apply  This link will open in a new window\n\n\n\n\n\n\n\n                \n                Apply as an existing customer\n            \xa0\nApply as an existing customer for HSBC Visa Platinum Card This link will open in a new window

In [None]:
# Retrieval Chain
# Rertrieval chain is needed so that we can interact with the document and ask meaningful question by providing context.
from langchain.chains.combine_documents import create_stuff_documents_chain # to anwer question much more properly
from langchain_core.prompts import ChatPromptTemplate
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-2.0-flash",
    temperature=0,
    max_tokens=None,
    timeout=None,
    max_retries=2,
)




prompt = ChatPromptTemplate.from_messages(
    [
        ("system","""
         Answer the questions based on provided context:
         <context>
         {context}
         </context>"""),
          ("human", "{input}") # This is the user's actual question

    ]
)

doc_chain = create_stuff_documents_chain(llm=llm, prompt=prompt)

retriever = vector_db.as_retriever() # retriever is an interface that helps to fetch data from vector store db
from langchain.chains import create_retrieval_chain
retrieval_chain = create_retrieval_chain(retriever, doc_chain) # the context will be filled by create stuff doc chain 

In [41]:
response = retrieval_chain.invoke({'input':"Money Back App"})
response['answer']

'1-year Moneyback VIP status when you successfully link your HSBC easy Credit Card in MoneyBack App account[@cards-1-year-moneyback-vip]. Earn 6X MoneyBack Points when you spend with your card every day.\nEarn 6X MoneyBack Points (=2.4% rebate) by linking your card in MoneyBack App'

In [42]:
response['context']

[Document(id='8dfd35fa-225a-41d3-af32-cb04b3c561f1', metadata={'source': 'https://www.hsbc.com.hk/credit-cards/apply/', 'title': 'Apply for Credit Cards Online | Offers & Rewards - HSBC HK', 'description': 'Apply for an HSBC credit card and enjoy exclusive benefits on card rewards, air miles and other rewards. Apply online now to enjoy extra welcome offers.', 'language': 'en-hk'}, page_content='Up to $700 RewardCash as welcome offer[@cards-welcomeoffer]\n\n1-year Moneyback VIP status when you successfully link your HSBC easy Credit Card in MoneyBack App account[@cards-1-year-moneyback-vip]. Earn 6X MoneyBack Points when you spend with your card every day.\nExtra $200 RewardCash for new credit card customers who successfully apply for a Spending Instalment Plan[@cards-spi-visa-platinum]\nFirst 2-year annual fee waiver\n\nRewards\n\nUp to 2.4% RewardCash for spending in the Rewards of Your Choice category\nEarn 6X MoneyBack Points (=2.4% rebate) by linking your card in MoneyBack App\n8% 

In [40]:
retrieval_chain

RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x72b6080bb950>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context', 'input'], input_types={}, partial_variables={}, messages=[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='\n         Answer the questions based on provided context:\n         <context>\n         {context}\n         </context>'), addition

In [None]:
# from langchain_core.documents import Document
# doc_chain.invoke({'input':"HSBC Easy Credit",
#                   'context':[Document(page_content="HSBC Easy Credit Card")]})

"Based on the context, you're asking about the HSBC Easy Credit Card."