In [None]:
# |default_exp langchain_rag_groq

Please reference [this blog post](https://nbdev.fast.ai/blog/posts/2022-11-07-spaces) on how to use this notebook.

## Install dependencies

## Make an app with Gradio

In [None]:
# |export
import gradio as gr
from dotenv import load_dotenv
import os
import time
import getpass

In [None]:
# |export
load_dotenv()
os.environ['HTTP_PROXY'] = 'http://127.0.0.1:20171'
os.environ['HTTPS_PROXY'] = 'http://127.0.0.1:20171'
# os.environ['NO_PROXY'] = 'localhost, 127.0.0.1'
print(os.environ.get('HTTP_PROXY'))
print(os.environ.get('HTTPS_PROXY'))
print(os.environ.get('GROQ_API_KEY'))
print(os.environ.get('OPIK_API_KEY'))


In [None]:
#| export
from llama_index.core import Settings, SimpleDirectoryReader
from llama_index.llms.groq import Groq

In [None]:

model_name = "meta-llama/llama-4-scout-17b-16e-instruct"
llm = Groq(model= model_name, api_key= os.environ.get('GROQ_API_KEY'))
Settings.llm = llm


In [None]:
# from llama_index.embeddings.huggingface import HuggingFaceEmbedding
# from llama_index.core import Settings
# Settings.embed_model = HuggingFaceEmbedding(model_name="BAAI/bge-small-en-v1.5")

In [None]:
from llama_index.core import Settings
from llama_index.embeddings.fastembed import FastEmbedEmbedding

# embedding_model = 'nomic-ai/nomic-embed-text-v2-moe'
embedding_model = 'nomic-ai/nomic-embed-text-v1'
embed_model = FastEmbedEmbedding(model_name=embedding_model)
Settings.embed_model = embed_model


In [None]:
import opik
opik.configure(use_local=False)

from llama_index.core import Settings
from llama_index.core.callbacks import CallbackManager
from opik.integrations.llama_index import LlamaIndexCallbackHandler

callback_manager = CallbackManager([LlamaIndexCallbackHandler()])
Settings.callback_manager = callback_manager

In [None]:
from llama_index.core.workflow import Event, Context, Workflow, StartEvent, StopEvent, step
from llama_index.core.schema import NodeWithScore
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex
from llama_index.core.response_synthesizers import CompactAndRefine
import nest_asyncio
nest_asyncio.apply()

In [None]:
#| export

# Add model mapping
GROQ_MODELS = {
    "Llama 4":"meta-llama/llama-4-scout-17b-16e-instruct",
    "DeepSeek-R1":"deepseek-r1-distill-llama-70b",
}

class RetrieverEvent(Event):
    """Result of running retrieval"""
    nodes: list[NodeWithScore]

class RAGWorkflow(Workflow):
    """RAG workflow"""

    def __init__(self, llm_choice="Llama 4", embedding_model="BAAI/bge-large-en-v1.5"):
        super().__init__()
        model_name = GROQ_MODELS.get(llm_choice)
        if not model_name:
            raise ValueError("Invalid LLM choice. Please select from:{', '.join(GROQ_MODELS.keys())}")

        #Initialize LLM and embedding model
        self.llm = Groq(model= model_name, api_key= os.environ.get('GROQ_API_KEY'))
        self.embed_model = FastEmbedEmbedding(model_name=embedding_model)

        self.index = None

    @step
    async def ingest(self, ctx: Context, ev:StartEvent) -> StopEvent | None:
        """Entry point to ingest documents from a directory"""
        dirname = ev.get("dirname")
        if not dirname:
            return None

        documents = SimpleDirectoryReader(dirname).load_data()
        self.index = VectorStoreIndex.from_documents(documents=documents)
        return StopEvent(result=self.index)

    @step
    async def retrieve(self, ctx: Context, ev: StartEvent) -> RetrieverEvent:
        """Entry point fro RAG retrieval"""
        query = ev.get("query")
        index = ev.get("index") or self.index

        if not query:
            return None

        if index is None:
            print("Index is empty, load some documents before querying!")
            return None

        retriever = index.as_retriever(similarity_top_k=2)
        nodes = await retriever.aretrieve(query)
        await ctx.set("query", query)
        return RetrieverEvent(nodes=nodes)

    @step
    async def synthesize(self, ctx: Context, ev: RetrieverEvent) -> StopEvent:
        """Generate response using retrieved nodes."""
        summarizer = CompactAndRefine(streaming=True, verbose=True)
        query = await ctx.get("query", default=None)
        response = await summarizer.asynthesize(query, nodes=ev.nodes)
        return StopEvent(response=response)

    async def query(self, query_text:str):
        """Helper method to perform a complete RAG query"""
        if self.index is None:
            raise ValueError("Index is empty. Call ingest_document first!")

        result = await self.run(query=query_text, index=self.index)
        if not result:
            raise ValueError("No result returned from query!")
        return result

    async def ingest_documents(self, directory: str):
        """Helper method to ingest documents"""
        result = await self.run(dirname=directory)
        self.index = result
        return result

In [None]:
from llama_index.utils.workflow import draw_all_possible_flows
draw_all_possible_flows(RAGWorkflow, filename="crag_workflow.html")

In [None]:
from IPython.display import display, Markdown
workflow = RAGWorkflow(llm_choice="Llama 4") #, embedding_model="nomic-ai/nomic-embed-text-v1")

In [None]:
await workflow.ingest_documents("../data")

In [None]:
result = await workflow.run(query="How was DeepSeek R1 trained?")

In [None]:
result = await workflow.run(query="How was DeepSeek R1 trained?")
display(Markdown(str(result)))

In [None]:
#|export
async def main():
    # Initialize the workflow
    print("Set up workflow ...")
    workflow = RAGWorkflow(llm_choice="Llama 4")
    print("start ingest ...")
    await workflow.ingest_documents("../data")

    # Perform a query
    print("start query ...")
    result = await workflow.query("How was DeepSeek R1 trained?")

    # Print the response
    async for chunk in result.async_response_gen():
        print(chunk, end="", flush=True)

In [None]:
if __name__ == "__main__":
    import asyncio
    asyncio.run(main())

In [None]:
#| export
def answer(message, history, system_prompt, tokens):
    files = []
    file_names = []
    for msg in history:
        if msg["role"] == "user" and isinstance(msg["content"], tuple):
            files.append(msg["content"][0])
            file_names.append(msg["content"][0].split("/")[-1])
    for file in message["files"]:
        files.append(file)
        file_names.append(file.split("/")[-1])

    #if message["text"]:
    #    content = message["text"]
    #else:
    #    content = system_prompt
    # content = message
    # question = system_prompt
    # response = f"Content: {content}\nQuestion: {question}\n"
    # len = min(len(response),int(response_len))

    user_input = f"Question: {system_prompt}\n Website: {message['text']}\n File:\n{'\n'.join(file_names)}"

    if validators.url(message['text']):
        loader = WebBaseLoader(
            # web_paths=("https://lilianweng.github.io/posts/2023-06-23-agent/",),
            web_paths=(message['text'],),
            bs_kwargs=dict(
                parse_only=bs4.SoupStrainer(
                    class_=("post-content", "post-title", "post-header")
                )
            ),
        )
        docs = loader.load()
        text_splitter = RecursiveCharacterTextSplitter(chunk_size=1000, chunk_overlap=200)
        all_splits = text_splitter.split_documents(docs)
        # Index chunks
        _ = vector_store.add_documents(documents=all_splits)

        # # # Compile application and test
        # graph_builder_i = StateGraph(State).add_sequence([retrieve, generate])
        # graph_builder_i.add_edge(START, "retrieve")
        # graph_i = graph_builder_i.compile()
        reply = graph.invoke({"question": system_prompt})
        response_i = reply["answer"]
    elif files:
        f = files[-1]
        f_name = file_names[-1]多久维护一次产品?
        response_i = f"File: {f_name}\n"

    # response_i = user_input
    for i in range(min(len(response_i), int(tokens))):
        time.sleep(0.05)
        yield response_i[: i + 1]

In [None]:
# |export
demo = gr.ChatInterface(
    answer,
    type="messages",
    title="智能问答RAG",
    description="输入一个网址，查询或询问其中的内容。",
    textbox=gr.MultimodalTextbox(value="https://lilianweng.github.io/posts/2023-06-23-agent/",
                                 file_count="multiple",
                                 file_types=["image", ".pdf", ".txt"],
                                 sources=["upload", "microphone"]),
    additional_inputs=[
        gr.Textbox("What is Task Decomposition?", label="你的问题在此输入！"),
        gr.Slider(10,400,value=300,label="回答长度")
    ],
    multimodal=True,
)
demo.launch(share=False)

In [None]:
# this is only necessary in a notebook
demo.close()

## Create a `requirements.txt` file

In [None]:
%%writefile ../requirements.txt
fastcore

## Convert this notebook into a Gradio app

In [None]:
# from nbdev.export import nb_export
# nb_export('01_gradio.ipynb', lib_path='.', name='gradio')

In [None]:
# | hide
import nbdev

nbdev.nbdev_export()

<div>
<link rel="stylesheet" href="https://gradio.s3-us-west-2.amazonaws.com/2.6.5/static/bundle.css">
<div id="target"></div>
<script src="https://gradio.s3-us-west-2.amazonaws.com/2.6.5/static/bundle.js"></script>
<script>
launchGradioFromSpaces("abidlabs/question-answering", "#target")
</script>
</div>