# Chat with Website

### This is a gemini based app that uses RAG to enable users chat with any website content

In [2]:
from dotenv import load_dotenv
load_dotenv(override=True)

True

In [5]:
import os
os.environ["GOOGLE_API_KEY"] = os.getenv("GOOGLE_API_KEY")
os.environ["LANGHCAIN_API_KEY"] = os.getenv("LANGCHAIN_API_KEY")
os.environ["LANGCHAIN_TRACING_V2"] = "true"
os.environ["LANGCHAIN_PROJECT"] = "default"

In [7]:
from langchain_google_genai import ChatGoogleGenerativeAI

llm = ChatGoogleGenerativeAI(
    model="gemini-1.5-flash",
    temperature=0.1,
    max_output_tokens=1024
)

llm.invoke("What is the capital of France?")

AIMessage(content='Paris', additional_kwargs={}, response_metadata={'prompt_feedback': {'block_reason': 0, 'safety_ratings': []}, 'finish_reason': 'STOP', 'model_name': 'gemini-1.5-flash', 'safety_ratings': []}, id='run--0da72978-d89e-4ada-ba92-810103bb7372-0', usage_metadata={'input_tokens': 7, 'output_tokens': 2, 'total_tokens': 9, 'input_token_details': {'cache_read': 0}})

### Step1: Data Loading from website


In [1]:
from langchain_community.document_loaders import WebBaseLoader
url = "https://towardsdatascience.com/software-engineering-in-the-llm-era/"
loader = WebBaseLoader(url)
documents = loader.load()
documents


USER_AGENT environment variable not set, consider setting it to identify your requests.


[Document(metadata={'source': 'https://towardsdatascience.com/software-engineering-in-the-llm-era/', 'title': 'Software Engineering in the LLM Era | Towards Data Science', 'language': 'en-US'}, page_content='\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nSoftware Engineering in the LLM Era | Towards Data Science\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\nPublish AI, ML & data-science insights to a global community of data professionals.\n\n\nSign in\nSign out\nSubmit an Article\n\n\n\n\n\n\nLatestEditor’s PicksDeep DivesNewsletter\n\nWrite For TDS\n\n\n \n\n\n\n \n\n \nToggle Mobile Navigation\n\n\n\n\nLinkedIn\nX\n\n\n\n \n\n \nToggle Search\n\n\nSearch\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n \n\t\tLarge Language Models\t\n\nSoftware Engineering in the LLM Era\n\n\n\n\t\t\tOn growing new software engineers, even when it’s 

### Step 2: Chunking the Data 

In [23]:
from langchain_text_splitters import RecursiveCharacterTextSplitter
splitter = RecursiveCharacterTextSplitter(
    chunk_size=1000,
    chunk_overlap=200,
    length_function=len
)
docs = splitter.split_documents(documents)
len(docs)

32

In [None]:
docs

### Step 3: Embedding the chunked docs

In [20]:
from langchain_google_genai import GoogleGenerativeAIEmbeddings
embeddings = GoogleGenerativeAIEmbeddings(model="models/gemini-embedding-001")
vector = embeddings.embed_query("France is a country in Europe")


In [21]:
len(vector)

3072

In [None]:
## Embed the chunked documents
from langchain_community.vectorstores import FAISS
vectorstore = FAISS.from_documents(docs, embeddings)


[Document(id='b99b1cb9-d89d-498d-b37b-68817afb9585', metadata={'source': 'https://ai.google.dev/gemini-api/docs/embeddings', 'title': 'Embeddings \xa0|\xa0 Gemini API \xa0|\xa0 Google AI for Developers', 'language': 'en'}, page_content='Send feedback\n  \n  \n\n\n\nExcept as otherwise noted, the content of this page is licensed under the Creative Commons Attribution 4.0 License, and code samples are licensed under the Apache 2.0 License. For details, see the Google Developers Site Policies. Java is a registered trademark of Oracle and/or its affiliates.\nLast updated 2025-07-18 UTC.\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n          Terms\n        \n\n\n\n          Privacy\n        \n\n\n\n          Manage cookies\n        \n\n\n\n\n\nEnglish\n\n\nDeutsch\n\n\nEspañol – América Latina\n\n\nFrançais\n\n\nIndonesia\n\n\nItaliano\n\n\nPolski\n\n\nPortuguês – Brasil\n\n\nShqip\n\n\nTiếng Việt\n\n\nTürkçe\n\n\nРусский\n\n\nעברית\n\n\nالعربيّة\n\n\nفارسی\n\n\nहिंदी\n\n\nবাংলা\n\n\nภาษาไท

In [26]:
answer = vectorstore.similarity_search("Unlike Generative AI model that")
print(answer)

[Document(id='61b11111-8084-4739-a7e2-2db5bb1b07e4', metadata={'source': 'https://ai.google.dev/gemini-api/docs/embeddings', 'title': 'Embeddings \xa0|\xa0 Gemini API \xa0|\xa0 Google AI for Developers', 'language': 'en'}, page_content='RETRIEVAL_DOCUMENT\nEmbeddings optimized for document search.\nIndexing articles, books, or web pages for search.\n\n\nRETRIEVAL_QUERY\n\n        Embeddings optimized for general search queries.\n        Use RETRIEVAL_QUERY for queries; RETRIEVAL_DOCUMENT for documents to be retrieved.\n    \nCustom search\n\n\nCODE_RETRIEVAL_QUERY\n\n    Embeddings optimized for retrieval of code blocks based on natural language queries.\n    Use CODE_RETRIEVAL_QUERY for queries; RETRIEVAL_DOCUMENT for code blocks to be retrieved.\n   \nCode suggestions and search\n\n\nQUESTION_ANSWERING\n\n    Embeddings for questions in a question-answering system, optimized for finding documents that answer the question.\n    Use QUESTION_ANSWERING for questions; RETRIEVAL_DOCUMENT 

In [34]:
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain.prompts import ChatPromptTemplate

prompt = ChatPromptTemplate.from_template(
    """Answer the question based on the context below.\n\n
    <context>
    {context}\n\n
    </context>

    """

)


document_chain = create_stuff_documents_chain(
    llm=llm,
    prompt=prompt,
)
document_chain

RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableLambda(format_docs)
}), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
| ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='Answer the question based on the context below.\n\n\n    <context>\n    {context}\n\n\n    </context>\n\n    '), additional_kwargs={})])
| ChatGoogleGenerativeAI(model='models/gemini-1.5-flash', google_api_key=SecretStr('**********'), temperature=0.1, max_output_tokens=1024, client=<google.ai.generativelanguage_v1beta.services.generative_service.client.GenerativeServiceClient object at 0x000001F523998460>, default_metadata=(), model_kwargs={})
| StrOutputParser(), kwargs={}, config={'run_name': 'stuff_documents_chain'}, config_factories=[])

In [35]:
from langchain.chains import create_retrieval_chain
retriever = vectorstore.as_retriever()
retrieval_chain = create_retrieval_chain(
    retriever,document_chain
)
retrieval_chain


RunnableBinding(bound=RunnableAssign(mapper={
  context: RunnableBinding(bound=RunnableLambda(lambda x: x['input'])
           | VectorStoreRetriever(tags=['FAISS', 'GoogleGenerativeAIEmbeddings'], vectorstore=<langchain_community.vectorstores.faiss.FAISS object at 0x000001F5723A19C0>, search_kwargs={}), kwargs={}, config={'run_name': 'retrieve_documents'}, config_factories=[])
})
| RunnableAssign(mapper={
    answer: RunnableBinding(bound=RunnableBinding(bound=RunnableAssign(mapper={
              context: RunnableLambda(format_docs)
            }), kwargs={}, config={'run_name': 'format_inputs'}, config_factories=[])
            | ChatPromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, messages=[HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['context'], input_types={}, partial_variables={}, template='Answer the question based on the context below.\n\n\n    <context>\n    {context}\n\n\n    </context>\n\n    '), additional_kwargs={})])


In [36]:
response = retrieval_chain.invoke({
    "input": "Using embeddings in Gemini",
    
})
print(response['answer'])

Based on the provided text, Gemini Embedding models are used to transform input data (words, phrases, sentences, and code) into numerical representations called embeddings.  These embeddings are then used in various Natural Language Processing (NLP) tasks such as semantic search, classification, and clustering, improving accuracy and context awareness compared to keyword-based methods.  A key application is building Retrieval Augmented Generation (RAG) systems, where embeddings help retrieve relevant information from knowledge bases to enhance the accuracy and context of language model outputs.  For large-scale applications, using Vertex AI is recommended.  The text also mentions storing embeddings in vector databases like BigQuery, AlloyDB, Cloud SQL, ChromaDB, Qdrant, Weaviate, and Pinecone.


In [None]:
print(response)

{'input': 'Using embeddings in Gemini', 'context': [Document(id='9894a252-fee5-4805-a9af-86da161fbf7e', metadata={'source': 'https://ai.google.dev/gemini-api/docs/embeddings', 'title': 'Embeddings \xa0|\xa0 Gemini API \xa0|\xa0 Google AI for Developers', 'language': 'en'}, page_content='Gemini for Research\n   \n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n\n          Veo 3 is now available in the Gemini API! Learn more\n\n\n\n\n\n\n\n    \n        Home\n      \n  \n\n\n\n\n    \n        Gemini API\n      \n  \n\n\n\n\n    \n        Models\n      \n  \n\n\n\n\n\n\n\n  \n    \n    Send feedback\n  \n  \n\n\n      Embeddings'), Document(id='133bb7e0-84b4-4d75-9301-8a727725f81d', metadata={'source': 'https://ai.google.dev/gemini-api/docs/embeddings', 'title': 'Embeddings \xa0|\xa0 Gemini API \xa0|\xa0 Google AI for Developers', 'language': 'en'}, page_content="embedding-001 (Deprecating on August 14, 2025)\ntext-embedding-004 (Deprecating on January 14, 2026)\n\nUsing embeddings\nUnlike genera