In [34]:
from langchain_community.document_loaders import PyPDFLoader
from langchain_text_splitters import RecursiveCharacterTextSplitter
from langchain_community.vectorstores import FAISS
from langchain_openai import OpenAIEmbeddings
from langchain_openai import ChatOpenAI
import logging
import os

In [35]:
# 1. Load PDF
docs_path = "D:\Pritesh\VS Code Workspace\Preparations\data\Report.pdf"
loader = PyPDFLoader(docs_path)
documents = loader.load()

In [None]:
# Set up logging
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

# Define termination message and docs_path
termination_msg = "TERMINATE"
docs_path = "D:\Pritesh\VS Code Workspace\Preparations\data\Report.pdf"  # Replace with your actual path

# Verify docs_path
if not os.path.exists(docs_path):
    logger.error(f"The docs_path '{docs_path}' does not exist.")
    raise FileNotFoundError(f"The docs_path '{docs_path}' does not exist.")
else:
    logger.info(f"docs_path '{docs_path}' is valid.")


In [37]:
import autogen
import os
from autogen import AssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent

In [38]:
config_list = [{"model": "gpt-4o", "api_key": os.environ.get("OPENAI_API_KEY")}]

llm_config={
    "request_timeout": 180,
    "seed": 42,
    "config_list": config_list,
    "temperature": 0,
}

In [None]:
import chromadb
from typing_extensions import Annotated

import autogen
from autogen import AssistantAgent
from autogen.agentchat.contrib.retrieve_user_proxy_agent import RetrieveUserProxyAgent

# config_list = autogen.config_list_from_json("OAI_CONFIG_LIST")

print("LLM models: ", [config_list[i]["model"] for i in range(len(config_list))])

In [54]:
def termination_msg(x):
    return isinstance(x, dict) and "TERMINATE" == str(x.get("content", ""))[-9:].upper()

In [55]:

llm_config = {"config_list": config_list, "timeout": 120, "temperature": 0, "seed": 1234}

In [56]:
boss = autogen.UserProxyAgent(
    name="Supervisor_Agent",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    code_execution_config=False,
    default_auto_reply="Reply `TERMINATE` if the task is done.",
    description="The supervisor_agent is who ask questions and give tasks to members.",
)

In [57]:
boss_aid = RetrieveUserProxyAgent(
    name="Retrieval_Supervisor",
    is_termination_msg=termination_msg,
    human_input_mode="NEVER",
    default_auto_reply="Reply `TERMINATE` if the task is done.",
    max_consecutive_auto_reply=3,
    retrieve_config={
        "task": "code",
        "docs_path": documents,
        "chunk_token_size": 1500,
        "vector_db":"chroma",
        "embedding_model":OpenAIEmbeddings(),
        "model": config_list[0]["model"],
        "collection_name": "groupchat",
        "get_or_create": True,
    },
    # code_execution_config=False,  # we don't want to execute code in this case.
    description="Assistant who has to manage the retrieval process for solving difficult problems by sharing the config details to coder and other agents. Retrieval_Supervisor reports to supervisor agent like any other member.",
)

In [58]:
data_engineer = AssistantAgent(
    name="Data_Pipeline_Engineer",
    is_termination_msg=termination_msg,
    system_message="You are a senior data engineer, you recieve the docs_path and responsible for splitting, chunking and embeddings. You get the chunking instructions from Retrieval_Supervisor boss aid. You will create OpenAIEmbeddings() and always store the embeddings in faiss vectorstore. Take the help of coder, reviewer and executor to help you build code and run. If Retrieval_Supervisor can assist you for embeddings, then take help.  Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
    description="Senior Data Engineer who takes responsibility of chunking, embeddings and vectorstore.",
)

In [59]:
prompt_engineer = AssistantAgent(
    name="Prompt_Engineer",
    is_termination_msg=termination_msg,
    system_message="You are a expert prompt engineer, you recieve the user_query or PROBLEM and you have to rephrase the query to achieve best result and contact Data_Pipeline_Engineer to create embeddings using HyDE. Take the help of coder, reviewer and executor to help you build code and run. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
    description="Prompt Engineer rephrasing the user query using HyDE ",
)

In [60]:
retriever = AssistantAgent(
    name="Retrieval_member",
    is_termination_msg=termination_msg,
    system_message="You are a retrieval member, you find the most similar chunks (by data pipeline engineer and stored in FAISS) with the rephrased query (done by prompt engineer) using cosine similarity. Take the help of coder, reviewer and executor to help you build code and run.  Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
    description="Retrieval Member performing cosine similarity between chunk embeddings and rephrased query embeddings.",
)

In [61]:
coder = AssistantAgent(
    name="Senior_Python_Engineer",
    is_termination_msg=termination_msg,
    system_message="You are a senior python engineer, you provide python code to to assist all the other members. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
    description="Senior Python Engineer who can write code to solve problems based on requirements of all other members.",
)

In [62]:
code_reviewer = autogen.AssistantAgent(
    name="Code_Reviewer",
    is_termination_msg=termination_msg,
    system_message="You are a code reviewer. Reply `TERMINATE` in the end when everything is done.",
    llm_config=llm_config,
    description="Code Reviewer who can review the code.",
)

In [63]:
executor = autogen.UserProxyAgent(
    name="Executor",
    system_message="Executor. Execute the code written by the coder and reviewed by code_reviewer and report the result.",
    human_input_mode="NEVER",
    code_execution_config={
        "last_n_messages": 3,
        "work_dir": "paper",
        "use_docker": False,
    },  # Please set use_docker=True if docker is available to run the generated code. Using docker is safer than running the generated code directly.
)

In [64]:
def _reset_agents():
    boss.reset()
    boss_aid.reset()
    coder.reset()
    executor.reset()
    retriever.reset()
    data_engineer.reset()
    prompt_engineer.reset()
    code_reviewer.reset()
    

In [65]:
groupchat = autogen.GroupChat(
    agents=[boss_aid, retriever, data_engineer, prompt_engineer, coder, code_reviewer, executor], messages=[], max_round=50
)
manager = autogen.GroupChatManager(groupchat=groupchat, llm_config=llm_config)

In [None]:
user_query = "give me a summarization of the document given"

boss.initiate_chat(
    manager,
    message=user_query,
)