In [7]:
from dotenv import load_dotenv

load_dotenv()

True

In [8]:
from llama_index.core.workflow import (
    Event,
    StartEvent,
    StopEvent,
    Workflow,
    Context,
    step
)

from llama_index.llms.openai import OpenAI

In [9]:
# from llama_index.core.agent import ReActAgent
# from llama_index.llms.openai import OpenAI
# llm = OpenAI(model = "gpt-4o-mini", temperature = 0)

# agent = ReActAgent.from_tools([], llm = llm, verbose = True)
# # prompt_dict = agent.get_prompts()
# # for k, v in prompt_dict.items():
# #     print(f"Prompt : {k} \n Value: {v.template} \n ------------\n ")

#  "Show prices for JBL T100TWS Headphones from Amazon "

In [10]:
class QueryEvent(Event):
    sub_query : str

class SearchInDBEvent(Event):
    pass

class SearchInWebEvent(Event):
    pass

class ValidateResultEvent(Event):
    pass

class NotFoundEvent(Event):
    pass

class UpdateDBEvent(Event):
    pass

class SearchEventResult(Event):
    pass


In [11]:
from llama_index.core.agent import ReActAgent
from llama_index.core import (
    VectorStoreIndex,
    StorageContext,
    load_index_from_storage
) 

from llama_index.core.tools import QueryEngineTool, ToolMetadata

import json, os

class InfoFusionWorkflow(Workflow):
    def __init__(self, *args, **kwargs):
        super().__init__(*args, **kwargs)

        index_persist_path = f"./storage/vector_store/"

        if os.path.exists(index_persist_path):
            storage_context = StorageContext.from_defaults(persist_dir = index_persist_path)
            self.index = load_index_from_storage(storage_context)
        else:
            #initialze an empty index
            self.index = VectorStoreIndex.from_documents([])

    @step(pass_context = True)
    async def break_down_query(self, ctx : Context, ev : StartEvent) -> QueryEvent:
        if hasattr(ev, 'query'):
             await ctx.set('query', ev.query)

        if hasattr(ev, 'llm'):
            await ctx.set('llm', ev.llm)

        if hasattr(ev, 'tools'):
            await ctx.set('tools', ev.tools)

        if hasattr(ev, 'no_of_portals'):
            await ctx.set('no_of_portals', ev.no_of_portals)

        no_of_default_portals = await ctx.get('no_of_portals')

        prompt = f"""
Given a user query output a list of relevant sub-questions such that the answers to all sub-questions put together
will answer the query. If the user query does not mention any specific website or a portal, 
use popular only {no_of_default_portals} relevant websites from that region for the search. 

Respond in pure JSON without any markdown, like this :
{{
    "sub_questions" : [
        List 10 frocks for 6 year old girl from 'Bitiya by Bhama' from Myntra.
        List 10 frocks for 6 year old girl from 'Bitiya by Bhama' from FirstCry.
        List 10 frocks for 6 year old girl from 'Bitiya by Bhama' from Amazon.
    ]
}}

Here is the user question : {await ctx.get('query')}
"""
        # agent = ReActAgent.from_tools(tools = [], llm = await ctx.get('llm'), verbose = True)
        # response = agent.chat(prompt)
        llm = await ctx.get('llm')
        response = llm.complete(prompt)
        print(f"\nResponse sub-questions: {response}")

        sub_questions = json.loads(str(response))['sub_questions']

        await ctx.set("No_of_sub_questions", len(sub_questions))
        for question in sub_questions:
            ctx.send_event(QueryEvent(sub_query = question))

    @step(pass_context = True)
    async def query_router(self, ctx : Context, ev : QueryEvent) -> StopEvent: #SearchInWebEvent: #| SearchInDBEvent :
        print("debug 1")
        query_engine = self.index.as_query_engine()

        print("debug 2")
        query_engine_tool = QueryEngineTool(
            query_engine = query_engine,
            metadata = ToolMetadata(name = 'product_info',
                                    description= 'Information about various products'))
        # query_engine_tools = [query_engine_tool]

        print("!!!!debug 3: Adding query_engine_tool tool")
        tools = await ctx.get('tools')
        # tools.extend(query_engine_tools)
        tools.append(query_engine_tool)
        print(f"tools: {tools}")

        print("debug 4")
        agent = ReActAgent.from_tools(tools = tools,
                                 llm = await ctx.get('llm'),
                                 Verbose = True)
        
        print("debug 5")
        prompt = f"""
Given a user query perform search in the vector database for relevant information. 
If the information is not found, proceed to perform a web search using the web-search tool. Do not make up any information.
You have these tools for your disposal : {tools}.
Here is the user query : {ev.sub_query}

Below output text is just for reference.
However you need to continue to use vector_db_response or web_search_response keys to indicate from where you are fetching the data.

If you find information in the vector DB, return the response in pure JSON format, without any markdown, like this:
{{
    "vector_db_response" : {{
        "query" : {ev.sub_query},
        "result" : {{
            "description" : "Found 2 reviews of the product.",
            "Review1" : "review 1", 
            "Review2" : "review 2"
        }}
    }}
}}

If you dont find information in the vector DB, return the response in pure JSON format, without any markdown, like this:
{{
    "vector_db_response" : {{
        "query" : {ev.sub_query}, 
        "result" : "NOT_FOUND"
    }}
}}

If you find information via a web search, return the response in pure JSON format, without any markdown, like this:
{{
    "web_search_response" : {{
        "query" : {ev.sub_query}, 
        "url" : "url of the website",
        "description" : "Found 2 reviews of the product.",
        "Review1" : "review 1", 
        "Review2" : "review 2"
    }}
}}

If you dont find information via a web search, return the response in pure JSON format, without any markdown, like this:
{{
    "web_search_response" : {{
        "query" : {ev.sub_query}, 
        "result" : "NOT_FOUND"
    }}
}}

"""
#         prompt = f"""
# Given a user query perform search in the vector database for relevant information. 
# If the information is not found, proceed to perform a web search using the web-search tool. Do not make up any information.
# Give the output in the form as shown below:
# Thought:
# Action:
# You have these tools for your disposal : {tools}.
# Here is the user query : {ev.sub_query}

# """

        print("debug 6")
        # print(f"\nquery: {ev.sub_query},\n prompt:{prompt}")
        response = agent.chat(await ctx.get('query'))
        print(f"query : {ev.sub_query} \n Chat response : \n {str(response)}\n")

        print("debug 7")
        response_obj = json.loads(str(response))
        print(f"json response: {response_obj}")
        
        print("debug 8: Sending event and done.")
        ctx.send_event(StopEvent(ev.sub_query))

    # @step(pass_context = True)
    # async def search_in_db(self, ev : SearchInDBEvent) -> ValidateResultEvent | NotFoundEvent:
    #     pass

    # @step(pass_context = True)
    # async def search_in_web(self, ev : SearchInWebEvent) -> ValidateResultEvent:
    #     pass

    # @step(pass_context = True)
    # async def update_db(self, ev : UpdateDBEvent) -> StopEvent:
    #     pass

    # @step(pass_context = True)
    # async def generate_final_result(self, ev : SearchEventResult) -> StopEvent | None:
    #     pass

debug 2
!!!!debug 3: Adding query_engine_tool tool
> Running step 53725219-d24d-4db9-93f0-f9867a743358. Step input: List various models of 'JBL T100TWS Headphones' from Amazon India and Flipcart and Compare their prices. Show data in tabular format.
[1;3;38;5;200mThought: The user is asking for a comparison of various models of 'JBL T100TWS Headphones' from Amazon India and Flipkart, including their prices. I will first search for the models and prices on both platforms.
Action: search
Action Input: {'query': 'JBL T100TWS Headphones site:amazon.in'}
[0m> Running step 39789dc5-c47e-4824-9dc6-22e9a25683ff. Step input: None
[1;3;38;5;200mThought: I found some models of JBL T100TWS Headphones on Amazon India, but I need to gather more specific pricing information. Now, I will search for the same on Flipkart.
Action: search
Action Input: {'query': 'JBL T100TWS Headphones site:flipkart.com'}
[0m[1;3;34mObservation: [Document(id_='a1ee07a8-92bc-4e30-a39d-8f2a68c701ae', embedding=None, me

In [9]:
import os, traceback

from llama_index.tools.tavily_research import TavilyToolSpec

tavily_tool = TavilyToolSpec(
    api_key=os.environ['TAVILY_API_KEY'],
)

# from langchain_community.tools.tavily_search import TavilySearchResults

# tavily_tool = TavilySearchResults(max_results=2)


llm = OpenAI(model = "gpt-4o-mini", temperature = 0)
workflow = InfoFusionWorkflow(timeout = 60, verbose = True)

tools = tavily_tool.to_tool_list()
# tools = []
try:
    result = await workflow.run(llm = llm,
                                tools = tools,
                                no_of_portals = 1,
                                query = "Show prices for JBL T100TWS Headphones from Amazon and suggest me a best deal.")

    print(result)
except Exception as e:
    print(f"Exception: {e} \n {traceback.format_exc()}")


Running step break_down_query

Response sub-questions: {
    "sub_questions": [
        "What is the price of JBL T100TWS Headphones on Amazon?",
        "Are there any discounts or deals available for JBL T100TWS Headphones on Amazon?",
        "What are the customer reviews for JBL T100TWS Headphones on Amazon?",
        "What are the specifications of JBL T100TWS Headphones listed on Amazon?",
        "Are there any alternative products to JBL T100TWS Headphones on Amazon?",
        "What is the shipping cost for JBL T100TWS Headphones on Amazon?",
        "What is the return policy for JBL T100TWS Headphones purchased on Amazon?",
        "Are there any bundle offers available for JBL T100TWS Headphones on Amazon?",
        "What is the warranty period for JBL T100TWS Headphones on Amazon?",
        "What are the payment options available for purchasing JBL T100TWS Headphones on Amazon?"
    ]
}
Step break_down_query produced no event
Running step query_router
debug 1
debug 2
!!!!d

In [11]:
# 