In [39]:
import os
from dotenv import load_dotenv
import openai
# Load environment variables from .env file
from autogen import  AssistantAgent
from autogen.agentchat.contrib.llamaindex_conversable_agent import LLamaIndexConversableAgent
from llama_index.core import SimpleDirectoryReader, VectorStoreIndex, GPTVectorStoreIndex
from llama_index.embeddings.openai import OpenAIEmbedding
from dotenv import load_dotenv
from llama_index.agent.openai import OpenAIAgent
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.agent import AgentRunner, ReActAgent
from llama_index.llms import openai
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.retrievers.bm25 import BM25Retriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.retrievers import QueryFusionRetriever
from llama_index.core.tools import QueryEngineTool, ToolMetadata
from autogen import ConversableAgent, UserProxyAgent
from autogen.agentchat.contrib.capabilities import teachability
from autogen import Cache
from autogen.agentchat.contrib.capabilities.text_compressors import LLMLingua
from autogen.agentchat.contrib.capabilities.transforms import TextMessageCompressor
from autogen.agentchat.contrib.capabilities import transform_messages
import autogen
import nest_asyncio
from llama_parse import LlamaParse
import Stemmer
from autogen import ConversableAgent, UserProxyAgent
from llama_index.core.agent import (
    StructuredPlannerAgent,
    FunctionCallingAgentWorker,
    ReActAgentWorker,
)
from llama_index.core.node_parser import SentenceSplitter
from llama_index.core import Settings
from llama_index.embeddings.openai import OpenAIEmbedding
from llama_index.core.tools import RetrieverTool, FunctionTool
from llama_index.core.schema import IndexNode
from llama_index.core.selectors import PydanticMultiSelector

from llama_index.core.retrievers import RouterRetriever,SummaryIndexRetriever,RecursiveRetriever
from llama_index.core import SummaryIndex
from huggingface_hub import login
from llama_index.core.memory import ChatMemoryBuffer
from llama_index.core.agent.workflow import AgentWorkflow, FunctionAgent
from llama_index.core.workflow import Context
from llama_index.core.workflow import JsonPickleSerializer, JsonSerializer

nest_asyncio.apply()
load_dotenv()


hugging_face_api = os.getenv('HUGGINGFACE_API_KEY')
login(hugging_face_api)

openai_api_key = os.getenv("OPENAI_API_KEY")
LLAMA_CLOUD_KEY = os.getenv("LLAMA_CLOUD_KEY")

#global configs
Settings.text_splitter = SentenceSplitter(chunk_size=1024, chunk_overlap=20)
Settings.llm = openai.OpenAI()
Settings.embed_model = OpenAIEmbedding()

Ingesting Data

In [2]:

# Load and process documents (ensure you have a `documents/` folder)
parser = LlamaParse(
    api_key=LLAMA_CLOUD_KEY,  # can also be set in your env as LLAMA_CLOUD_API_KEY
    result_type="markdown",  # "markdown" and "text" are available
    verbose=True,
)

file_extractor = {".pdf": parser}
documents = SimpleDirectoryReader(input_dir=r"Data_repo", file_extractor=file_extractor).load_data()

# Create a vector index
print(documents)

Started parsing the file under job_id de37cff1-09fa-498d-b95e-0078ae7ba5e5
Started parsing the file under job_id a67ab1cd-1048-4019-b198-909ad6c82249
Started parsing the file under job_id 622ffd98-f321-415b-99ee-b885aea5aadb
Started parsing the file under job_id 65d6e6a5-c89c-440b-8a27-ddc80e9c69fa
Started parsing the file under job_id d0674ee8-833e-43b7-970b-9eb68a71cc2b
[Document(id_='674d4d40-d166-492c-9daf-1a27aecd318d', embedding=None, metadata={'file_path': 'd:\\docs\\LLM_RAG_structureddata\\autogen\\Data_repo\\Kamya_Resume.pdf', 'file_name': 'Kamya_Resume.pdf', 'file_type': 'application/pdf', 'file_size': 437812, 'creation_date': '2025-02-25', 'last_modified_date': '2024-12-15'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, metadata_template='{key}: 

In [86]:
for doc in documents:
    print(str(doc.metadata['file_path'].split("\\")[-1]).split('.')[0])

Kamya_Resume
MrittikaDas_resume
Oindrila_Chakravarty_Resume
Rajarshi_Chatterjee_Resume
Richa_Gupta_AWS_Eng_Resume


Creating nodes, agent, indexes, retrievers, query engines, agents for each docs

In [31]:
agent_list = []
query_engine_tools_list = []
initial_name = None
for doc in documents:
    splitter = SentenceSplitter(chunk_size=200, chunk_overlap=70)
    nodes = splitter.get_nodes_from_documents(documents)
    vector_indexes = GPTVectorStoreIndex(nodes)
    vector_retriever = VectorIndexRetriever(index=vector_indexes, similarity_top_k=3)
    vector_query_engine = RetrieverQueryEngine(
                retriever=vector_retriever,
            )

    summary_index = SummaryIndex(nodes)
    summary_index_query_engine = summary_index.as_query_engine()

    memory = ChatMemoryBuffer(token_limit=1024)


    query_engine_tools = [QueryEngineTool(query_engine=vector_query_engine,
                                        metadata=ToolMetadata(
                                            name='vector_tool', 
                                            description=f"This tool is used to retrieve detailed data for documents related to {str(doc.metadata['file_path'].split("\\")[-1])}. For example years of experience, tech stacks, companies worked for, project details etc.")
                                        ),
                        QueryEngineTool(query_engine=summary_index_query_engine,
                                        metadata=ToolMetadata(
                                            name='summary_tool', 
                                            description=f"This tool is used to retrieve summarized info from documents related to {str(doc.metadata['file_path'].split("\\")[-1])}. For example name, overall experience, introduction etc.")

                                        )]
    
    review_agent = FunctionAgent(
                name=f"{str(doc.metadata['file_path'].split("\\")[-1]).split('.')[0]}_Agent",
                description=f"Reviews resume to provide information regarding {str(doc.metadata['file_path'].split("\\")[-1]).split('.')[0]}",
                system_prompt="You are a meticulous recruiter who scans through resumes to provide details...",
                tools=query_engine_tools_list
            ) 
    initial_name = f"{str(doc.metadata['file_path'].split("\\")[-1]).split('.')[0]}_Agent"
    
    query_engine_tools_list.extend(query_engine_tools)
    agent_list.append(review_agent)

Using AgentWorkFlow with tools

In [4]:
workflow = AgentWorkflow.from_tools_or_functions(
    query_engine_tools_list,
    system_prompt=(
        "You are a helpful assistant that can scan through documents."
    ),
)


Creating Context for workflow memory and state awareness

In [32]:
ctx = Context(workflow)

In [38]:
response = await workflow.run(
    user_msg="how long can the ctx be stored?", ctx=ctx
)
print(response)

The context (ctx) can be stored for the duration of the conversation session. Once the conversation session ends or times out, the context is typically cleared. If you have specific requirements for storing context beyond the session, you may need to implement custom storage and retrieval mechanisms.


saving context to a local directory

In [44]:
ctx_dict = ctx.to_dict(serializer=JsonSerializer())
import json
with open("local_context.json", "w+") as file:
    json.dump(ctx_dict, file)

Retrieving the stored context and reusing it

In [46]:
restored_ctx = Context.from_dict(
    workflow, ctx_dict, serializer=JsonSerializer()
)

response3 = await workflow.run(user_msg="What was my previous question?", ctx=restored_ctx)
print(response3)

Your previous question was "do you remember the names of the candidates?"


Using Agentworkflow with agents

In [10]:
def initiator_agent():
    # """Multiple two integers and returns the result integer"""
    return "Initiating"


initiator_tool = FunctionTool.from_defaults(fn=initiator_agent)

# initialize ReAct agent
agent = FunctionAgent(
    name="InitiatorAgent",
    description="This just bypasses the tasks to the required agents as per the queries from the user.",
    system_prompt="You are an expert orchestrator...",
    tools=[initiator_tool]
)

agent_list.append(agent)

agent_workflow = AgentWorkflow(
    agents=agent_list,
    root_agent='InitiatorAgent',
)

In [11]:
response = await agent_workflow.run(
    user_msg="Tell me the names of all the candidates?",
)
print(response)

I have initiated the process to gather the names of all the candidates. Let me retrieve the information for you.
I have retrieved the names of all the candidates. Here are the names:

1. Kamya
2. Mrittika Das
3. Oindrila Chakravarty
4. Rajarshi Chatterjee
5. Richa Gupta

If you need more information about any specific candidate, feel free to ask!


Not so good AgentRunner usage. Preferred for simple version

In [100]:

agent_runner = AgentRunner.from_llm(query_engine_tools_list,)
agent_openAI = OpenAIAgent.from_tools(query_engine_tools,
                                    verbose=True,
                                    memory=memory,
                                    system_prompt=f"You are a specialized agent designed to provide information for document. You must always use any one of the tools if question is regarding this user or a generic question. DO NOT RELY ON PAST KNOWLEDGE.")


query = 'Tell me the names of all the candidates?'
response = agent_runner.query(query)
response.response

'The names of all the candidates are:\n1. Kamya\n2. Mrittika Das\n3. Oindrila Chakravarty\n4. Rajarshi Chatterjee'

Integrating llamaindex with langchain.
Llamaindex for the tools.
Langchain for the agents and chat memory history

In [12]:
langchain_tools = [tools.to_langchain_tool() for tools in query_engine_tools_list]

In [23]:
from langchain.agents import create_tool_calling_agent
from langchain_openai import ChatOpenAI
from langchain import hub
from langchain.agents import AgentExecutor
from langchain_community.chat_message_histories import ChatMessageHistory
from langchain_core.runnables.history import RunnableWithMessageHistory
# Get the prompt to use - you can modify this!



In [22]:
llm = ChatOpenAI(model="gpt-3.5-turbo-0125", temperature=0)
prompt = hub.pull("hwchase17/openai-functions-agent")
print(prompt.messages)
agent = create_tool_calling_agent(llm, langchain_tools, prompt)
agent_executor = AgentExecutor(agent=agent, tools=langchain_tools, verbose=True)



[SystemMessagePromptTemplate(prompt=PromptTemplate(input_variables=[], input_types={}, partial_variables={}, template='You are a helpful assistant'), additional_kwargs={}), MessagesPlaceholder(variable_name='chat_history', optional=True), HumanMessagePromptTemplate(prompt=PromptTemplate(input_variables=['input'], input_types={}, partial_variables={}, template='{input}'), additional_kwargs={}), MessagesPlaceholder(variable_name='agent_scratchpad')]


In [None]:
agent_executor.invoke({"input": "Can you tell me the name of all the candidates?"})['output']



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `summary_tool` with `{'input': 'Kamya_Resume.pdf'}`


[0m[36;1m[1;3mKamya is a motivated professional with strong analytical and investigative skills, specializing as a CDD Exits Analyst at NatWest. She excels in process improvement, data analysis, and effective communication. Kamya is known for consistently meeting KPIs, delivering results within SLAs, and contributing to team success. She has demonstrated expertise in conducting detailed investigations of customer data, ensuring compliance with guidelines, and maintaining accuracy. Additionally, Kamya has actively contributed to process optimization, team engagement, and building strong professional relationships within the organization.[0m[32;1m[1;3m
Invoking: `summary_tool` with `{'input': 'Mrittika_Das_resume.pdf'}`


[0m[36;1m[1;3mMrittika Das is a Software Engineer with over 2.6 years of experience, specializing in working with Ping Products. She ha

'The names of the candidates are:\n1. Kamya\n2. Mrittika Das\n3. Oindrila Chakravarty\n4. Rajarshi Chatterjee\n5. Richa Gupta'

Langchain agent with chat history

In [25]:
message_history = ChatMessageHistory()
agent_with_chat_history = RunnableWithMessageHistory(
    agent_executor,
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    lambda session_id: message_history,
    input_messages_key="input",
    history_messages_key="chat_history",
)

In [29]:
agent_with_chat_history.invoke(
    {"input": "Tell me the name of teh candidate best suited?"},
    # This is needed because in most real world scenarios, a session id is needed
    # It isn't really used here because we are using a simple in memory ChatMessageHistory
    config={"configurable": {"session_id": "<foo>"}},
)



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3m
Invoking: `summary_tool` with `{'input': 'Kamya_Resume.pdf'}`


[0m[36;1m[1;3mKamya is a motivated professional with strong analytical and investigative skills, specializing as a CDD Exits Analyst at NatWest. She excels in process improvement, data analysis, and effective communication. Kamya is known for consistently meeting KPIs and delivering results within SLAs while actively contributing to team success. Additionally, she is recognized for her collaborative approach, presenting actionable ideas for process optimization and implementing time-saving strategies to enhance team efficiency. Kamya also plays a key role in team engagement by organizing activities, mentoring new members, and fostering collaboration, showcasing her leadership skills and dedication to a supportive work environment.[0m[32;1m[1;3m
Invoking: `summary_tool` with `{'input': 'Mrittika_Das_resume.pdf'}`


[0m[36;1m[1;3mMrittika Das is a Softwar

{'input': 'Tell me the name of teh candidate best suited?',
 'chat_history': [HumanMessage(content="hi! I'm bob", additional_kwargs={}, response_metadata={}),
  AIMessage(content='Hello Bob! How can I assist you today?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='can you tell me the names of all the candidates?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='The names of the candidates are:\n1. Kamya\n2. Mrittika Das\n3. Oindrila Chakravarty\n4. Rajarshi Chatterjee\n5. Richa Gupta\n\nIs there anything specific you would like to know about any of these candidates?', additional_kwargs={}, response_metadata={}),
  HumanMessage(content='who is most suited for a role of AI Engineer?', additional_kwargs={}, response_metadata={}),
  AIMessage(content='Based on the summaries of the candidates, here is a brief overview of their backgrounds:\n\n1. **Kamya** - Strong analytical and investigative skills, process improvement expertise, and effective com