In [1]:
import os
from dotenv import load_dotenv
from langchain_groq import ChatGroq
from langchain_community.document_loaders import WebBaseLoader
from langchain.text_splitter import RecursiveCharacterTextSplitter
from langchain_community.embeddings import HuggingFaceEmbeddings
from langchain_community.vectorstores import Chroma
from langchain.chains import create_retrieval_chain
from langchain.chains.combine_documents import create_stuff_documents_chain
from langchain_core.prompts import ChatPromptTemplate
from langchain.agents import AgentExecutor, create_tool_calling_agent
from langchain.tools.retriever import create_retriever_tool

USER_AGENT environment variable not set, consider setting it to identify your requests.


In [2]:
def setup_environment():
    """
    Loads environment variables from a .env file and checks for the Groq API key.
    """
    load_dotenv()
    api_key = os.getenv("GROQ_API_KEY")
    if not api_key:
        print("ERROR: Groq API key not found.")
        print("Please create a .env file and add your GROQ_API_KEY to it.")
        return False
    print("Groq API Key loaded successfully from .env file.")
    return True

In [3]:
def initialize_llm():
    """
    Initializes and returns the Groq Language Model.
    """
    try:
        llm = ChatGroq(model_name="meta-llama/llama-4-scout-17b-16e-instruct", temperature=0)
        print("Groq LLM (Llama3-8b) initialized.")
        return llm
    except Exception as e:
        print(f"Error initializing Groq LLM: {e}")
        return None

In [10]:
def load_and_split_documents(url):
    """
    Loads documents from a URL and splits them into manageable chunks.
    """
    try:
        loader = WebBaseLoader(url)
        docs = loader.load()
        print(f"Loaded {len(docs)} document(s) from the web.")
        
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        splits = text_splitter.split_documents(docs)
        print(f"Document split into {len(splits)} chunks.")
        return splits
    except Exception as e:
        print(f"Error loading or splitting documents: {e}")
        return None

In [11]:
def create_vector_store(splits):
    """
    Creates a ChromaDB vector store from document splits.
    """
    try:
        print("Initializing embedding model and vector store (ChromaDB)...")
        embedding_model = HuggingFaceEmbeddings(model_name="all-MiniLM-L6-v2")
        vectorstore = Chroma.from_documents(documents=splits, embedding=embedding_model)
        print("Vector store created successfully.")
        return vectorstore
    except Exception as e:
        print(f"Error creating vector store: {e}")
        return None

In [12]:
def create_agent_executor(llm, vectorstore):
    """
    Creates and returns a LangChain agent executor.
    This agent is equipped with a retriever tool to perform RAG.
    """
    print("Creating RAG tool and agent...")
    retriever = vectorstore.as_retriever(search_kwargs={"k": 3})

    # Create a tool that uses our retriever
    retriever_tool = create_retriever_tool(
        retriever,
        "document_search",
        "Search for information from the provided document. For any questions about the document, you must use this tool!",
    )

    # Define the set of tools the agent can use
    tools = [retriever_tool]

    # Create the agent prompt
    system_prompt = (
        "You are a helpful assistant. You have access to a tool for searching "
        "information within a document provided by the user."
    )
    agent_prompt = ChatPromptTemplate.from_messages(
        [
            ("system", system_prompt),
            ("human", "{input}"),
            ("placeholder", "{agent_scratchpad}"),
        ]
    )

    # Create the agent itself
    agent = create_tool_calling_agent(llm, tools, agent_prompt)

    # Create the Agent Executor, which runs the agent and its tools
    agent_executor = AgentExecutor(agent=agent, tools=tools, verbose=True)
    print("Agent with RAG tool is ready.")
    return agent_executor

In [13]:
def start_interactive_session(agent_executor):
    """
    Starts the interactive Q&A session with the user.
    """
    print("\n--- RAG Q&A System is Running ---")
    print("You can now ask questions about the content of the provided web page.")
    print("Type 'exit' to quit.")

    while True:
        user_input = input("\nYour Question: ")
        if user_input.lower() == 'exit':
            print("Exiting the system. Goodbye!")
            break

        try:
            # The agent decides whether to use the retriever tool for relevant questions.
            response = agent_executor.invoke({"input": user_input})
            print("\nAnswer:", response['output'])
        except Exception as e:
            print(f"\nAn error occurred: {e}")

In [14]:
def main():
    """
    Main function to orchestrate the setup and execution of the RAG Q&A system.
    """
    if not setup_environment():
        return

    llm = initialize_llm()
    if not llm:
        return

    # Get the document URL from the user
    document_url = input("Please enter the URL of the document you want to query: ")
    if not document_url:
        print("No URL provided. Exiting.")
        return

    doc_splits = load_and_split_documents(document_url)
    if not doc_splits:
        return

    vector_store = create_vector_store(doc_splits)
    if not vector_store:
        return

    agent_executor = create_agent_executor(llm, vector_store)
    if not agent_executor:
        return

    start_interactive_session(agent_executor)

In [None]:
if __name__ == '__main__':
    main()

Groq API Key loaded successfully from .env file.
Groq LLM (Llama3-8b) initialized.


Please enter the URL of the document you want to query:  https://developers.google.com/machine-learning/resources/intro-llms


Loaded 1 document(s) from the web.
Document split into 15 chunks.
Initializing embedding model and vector store (ChromaDB)...
Vector store created successfully.
Creating RAG tool and agent...
Agent with RAG tool is ready.

--- RAG Q&A System is Running ---
You can now ask questions about the content of the provided web page.
Type 'exit' to quit.



Your Question:  What is LLM?




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mLLM stands for Large Language Model. It refers to a type of artificial intelligence (AI) model designed to process and understand human language at a large scale. These models are trained on vast amounts of text data, which enables them to learn patterns, relationships, and context within language. This training allows LLMs to generate text, answer questions, translate languages, and perform various other natural language processing tasks. They are a significant advancement in the field of natural language processing (NLP) and have numerous applications in areas such as customer service, language translation, content generation, and more.[0m

[1m> Finished chain.[0m

Answer: LLM stands for Large Language Model. It refers to a type of artificial intelligence (AI) model designed to process and understand human language at a large scale. These models are trained on vast amounts of text data, which enables them to learn patter


Your Question:  How large is LLM?




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `document_search` with `{'query': 'LLM size'}`


[0m[36;1m[1;3m(4) Response generation: LLM receives the execution results and provides summarized results to users.
To put HuggingGPT into real world usage, a couple challenges need to solve: (1) Efficiency improvement is needed as both LLM inference rounds and interactions with other models slow down the process; (2) It relies on a long context window to communicate over complicated task content; (3) Stability improvement of LLM outputs and external model services.

Overview of a LLM-powered autonomous agent system.

API-Bank (Li et al. 2023) is a benchmark for evaluating the performance of tool-augmented LLMs. It contains 53 commonly used API tools, a complete tool-augmented LLM workflow, and 264 annotated dialogues that involve 568 API calls. The selection of APIs is quite diverse, including search engines, calculator, calendar queries, smart home control, sche


Your Question:  What is Transformers?




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mTransformers is a type of neural network architecture introduced in the paper "Attention Is All You Need" by Vaswani et al. in 2017. It's primarily used for natural language processing tasks, such as machine translation, text classification, and language generation.

The Transformer model relies on self-attention mechanisms to weigh the importance of different input elements relative to each other, allowing it to capture long-range dependencies and contextual relationships in sequential data.

Transformers have become a foundational component in many state-of-the-art language models, including BERT, RoBERTa, and XLNet, and have achieved remarkable results in various NLP tasks.[0m

[1m> Finished chain.[0m

Answer: Transformers is a type of neural network architecture introduced in the paper "Attention Is All You Need" by Vaswani et al. in 2017. It's primarily used for natural language processing tasks, such as machine trans