In [1]:
%load_ext autoreload
%autoreload 2

In [None]:
import gradio as gr
from src import Config
from llama_deploy import deploy_workflow, deploy_core, WorkflowServiceConfig, ControlPlaneConfig, SimpleMessageQueueConfig
from src import ReActAgent
from src.utils import download_10k_reports
from src.utils import load_hf_model
from src.utils import initialize_groq_model
from src.ingest import setup_indices
from llama_index.core import PromptTemplate
from src.query_engine import HybridRetriver, StuffedContextQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from llama_index.postprocessor.colbert_rerank import ColbertRerank
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from llama_index.core.indices.vector_store.retrievers import VectorIndexRetriever

async def main(local_model=False):
    download_10k_reports()
    config = Config()
    if local_model:
        hf_models = load_hf_model(config.hf_tiny_model, config.hf_embed_model)
    else:
        hf_models = load_hf_model(config.hf_tiny_model, config.hf_embed_model, embed_model_only=True)
        groq_llama_8b = initialize_groq_model("llama3-8b-8192")
        groq_llama_70b = initialize_groq_model("llama3-70b-8192")
    
    indices = setup_indices(hf_models['embed_model'])
    lyft_index = indices['lyft_index']
    uber_index = indices['uber_index']

    # lyft_retriever = HybridRetriver(index=lyft_index, vector_similarity_top_k=20, bm25_similarity_top_k=20, fusion_similarity_top_k=20, llm=groq_llama_8b)
    # uber_retriever = HybridRetriver(index=uber_index, vector_similarity_top_k=20, bm25_similarity_top_k=20, fusion_similarity_top_k=20, llm=groq_llama_8b)
    
    lyft_retriever = VectorIndexRetriever(index=lyft_index, similarity_top_k=20)
    uber_retriever = VectorIndexRetriever(index=uber_index, similarity_top_k=20)
    
    colbert_reranker = ColbertRerank(
        top_n=10,
        model="colbert-ir/colbertv2.0",
        tokenizer="colbert-ir/colbertv2.0",
        keep_retrieval_score=True,
    )
    
    postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.7),
                    colbert_reranker]                       
        
    qa_prompt = PromptTemplate(
        """\
    Context information is below.
    ---------------------
    {context_str}
    ---------------------
    Given the context information and not prior knowledge, answer the query.
    Query: {query_str}
    Answer: \
    """
    )
    
    lyft_query_engine = StuffedContextQueryEngine(
                        retriever=lyft_retriever, 
                        qa_prompt=qa_prompt,
                        llm=groq_llama_8b,
                        node_postprocessors=postprocessors
                        )
    
    uber_query_engine = StuffedContextQueryEngine(
                        retriever=uber_retriever, 
                        qa_prompt=qa_prompt,
                        llm=groq_llama_8b,
                        node_postprocessors=postprocessors
                        )
    
    query_engine_tools = [
        QueryEngineTool(
            query_engine=lyft_query_engine,
            metadata=ToolMetadata(
                name="lyft_10k",
                description="Provides information about Lyft financials for year 2021",
            ),
        ),
        QueryEngineTool(
            query_engine=uber_query_engine,
            metadata=ToolMetadata(
                name="uber_10k",
                description="Provides information about Uber financials for year 2021",
            ),
        ),
    ]
    agent = ReActAgent(
            llm=groq_llama_8b,
            tools=query_engine_tools,
            timeout=120,
            verbose=False
        )

    # # Define the Gradio interface
    # def chat_interface(user_input):
    #     # Start the workflow with the user input
    #     result = agent.run(user_inpu)
        
    #     # Extract the response from the result
    #     response = result.result["response"]
    #     return response

    # def chat_with_agent(user_input, state):
    #     # Append the user's message to the conversation history
    #     state.append({"role": "user", "content": user_input})

    #     print(f"Userinput is: {user_input}")
    #     print(f"State is: {state}")
        
    #     output = agent.run(user_input)
    #     response = output.result["response"]
    #     sources = output.result["sources"]
    #     reasoning = output.result["reasoning"]
    
    #     # Append the agent's response to the conversation history
    #     state.append({"role": "assistant", "content": response})
        
    #     print(f"Response is: {response}")
    #     print(f"State after run is: {state}")
        
    #     return state, response #, \n\n".join(sources), "\n\n".join(reasoning)

    # def reset_state():
    #     return []

    
    # demo = gr.Blocks()
    # with demo:
    #     gr.Markdown("# React Agent Chat")
        
    #     with gr.Row():
    #         chatbot = gr.Chatbot([], elem_id="chatbot", type="messages")
    #         state = gr.State([])
    
    #     user_input = gr.Textbox(
    #         show_label=False,
    #         placeholder="Enter your message...",
    #         lines=2,
    #         max_lines=4,
    #     )
    
    #     # Submit action on Enter key press
    #     user_input.submit(chat_with_agent, [user_input, state], [state, chatbot]).then(
    #         lambda: gr.update(value=''),  # Clear the textbox after submission
    #         None,
    #         [user_input]
    #     )
    
    #     submit_button = gr.Button("Submit")
    #     submit_button.click(chat_with_agent, [user_input, state], [state, chatbot]).then(
    #         lambda: gr.update(value=''),  # Clear the textbox after clicking submit
    #         None,
    #         [user_input]
    #     )
    
    #     reset_button = gr.Button("Reset")
    #     reset_button.click(reset_state, None, state)
    
    # demo.launch(share=True)

    # # Create the Gradio interface
    # print('Building interface ...')
    # iface = gr.Interface(
    #     fn=chat_interface,
    #     inputs="text",
    #     outputs="text",
    #     title="ReAct Agent Chat",
    #     description="Interact with the ReAct Agent",
    # )
    # # Launch the interface
    # iface.launch(share=True)
    
    await deploy_core(
        control_plane_config=ControlPlaneConfig(),
        message_queue_config=SimpleMessageQueueConfig(),
    )
    
    await deploy_workflow(
        ReActAgent(
            llm=groq_llama_8b,
            tools=query_engine_tools,
            timeout=120,
            verbose=False
        ),
        # 10.192.12.249 is the IP address ofmy cloud provider
        WorkflowServiceConfig(host="10.192.12.249", port=8000, service_name="my_workflow"),
        ControlPlaneConfig(host="10.192.12.249", port=8000),
    )

if __name__ == "__main__":
    import asyncio
    asyncio.run(main())

Loaded environment from /teamspace/studios/this_studio/react_agent/config.env
Successfully downloaded: /teamspace/studios/this_studio/react_agent/src/utils/src/data/10k/uber_2021.pdf
Successfully downloaded: /teamspace/studios/this_studio/react_agent/src/utils/src/data/10k/lyft_2021.pdf
Loaded environment from /teamspace/studios/this_studio/react_agent/config.env
Loading embeding model
Found existing indices, loading from disk...


INFO:llama_deploy.message_queues.simple - Launching message queue server at 127.0.0.1:8001
INFO:     Started server process [12910]
INFO:     Waiting for application startup.
INFO:llama_deploy.control_plane.server - Launching control plane server at 127.0.0.1:8000
INFO:     Started server process [12910]
INFO:     Waiting for application startup.
INFO:     Application startup complete.
INFO:     Application startup complete.
INFO:     Uvicorn running on http://127.0.0.1:8001 (Press CTRL+C to quit)
INFO:     Uvicorn running on http://127.0.0.1:8000 (Press CTRL+C to quit)
INFO:llama_deploy.message_queues.simple - Consumer ControlPlaneServer-34d5da49-4c3d-489c-b99e-c4c37ac1e112: control_plane has been registered.


INFO:     127.0.0.1:50906 - "POST /register_consumer HTTP/1.1" 200 OK
