# GraphReasoning with Autogen

Michael Yu-Chuan Hsu, MIT, 2024 mkychsu@MIT.EDU

Markus J. Buehler, MIT, 2024 mbuehler@MIT.EDU



In [1]:
!curl http://localhost:8080/v1/models

curl: (7) Failed to connect to localhost port 8080 after 0 ms: Couldn't connect to server


In [2]:
import os
os.environ["CUDA_DEVICE_ORDER"]="PCI_BUS_ID"   # see issue #152

In [3]:
import autogen, openai
config_list = [
    {
        "model":"Llama3.1",
        "base_url": "http://localhost:8080/v1",
        "api_key":"NULL",
        "max_tokens": 40000
    },
]
config_list_semikong = config_list

# config_list_semikong = [
#     {
#         "model":"Semikong",
#         "base_url": "http://localhost:8081/v1",
#         "api_key":"NULL",
#         "max_tokens": 8192
#     },
# ]

# 
llm_config = {
    "cache_seed": 9527,  # seed for caching and reproducibility
    "config_list": config_list,  # a list of OpenAI API configurations
    "temperature": 0,  # temperature for sampling
    "max_tokens": 40000,
}

semikong_config = {
    "cache_seed": 9527,  # seed for caching and reproducibility
    "config_list": config_list_semikong,  # a list of OpenAI API configurations
    "temperature": 0,  # temperature for sampling
    "max_tokens": 40000,
}


  from .autonotebook import tqdm as notebook_tqdm
2025-02-13 21:31:50,133	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
2025-02-13 21:31:54,925	INFO util.py:154 -- Missing packages: ['ipywidgets']. Run `pip install -U ipywidgets`, then restart the notebook server for rich notebook output.
flaml.automl is not available. Please install flaml[automl] to enable AutoML functionalities.


In [4]:
import os
from transformers import AutoModelForCausalLM, AutoTokenizer

from tqdm.notebook import tqdm
from IPython.display import display, Markdown

verbatim=False

data_dir='./GRAPHDATA_TSMC'    
data_dir_output='./GRAPHDATA_TSMC_OUTPUT'

tokenizer_model=f'/home/mkychsu/pool/llm/SEMIKONG-8b-GPTQ'
# embedding_tokenizer = AutoTokenizer.from_pretrained(tokenizer_model, use_fast=False)
# embedding_model = AutoModelForCausalLM.from_pretrained(tokenizer_model, device_map='cuda', torch_dtype='auto', output_hidden_states=True)

embedding_tokenizer = AutoTokenizer.from_pretrained(tokenizer_model,use_fast=False)
embedding_model = AutoModelForCausalLM.from_pretrained(tokenizer_model,output_hidden_states=True).to('cuda')

embedding_doc_file=f'{data_dir_output}/chroma'



  def forward(ctx, input, qweight, scales, qzeros, g_idx, bits, maxq):
  def backward(ctx, grad_output):
  @custom_fwd(cast_inputs=torch.float16)
CUDA extension not installed.
CUDA extension not installed.
`low_cpu_mem_usage` was None, now set to True since model is quantized.
Loading checkpoint shards: 100%|██████████| 3/3 [00:00<00:00,  3.29it/s]


In [5]:
from GraphReasoning import load_embeddings, save_embeddings, generate_node_embeddings
import networkx as nx

embedding_file='TSMC_KG_70b.pkl'
generate_new_embeddings=True

G = nx.read_graphml(f'{data_dir_output}/5books_70b.graphml')

import torch
if os.path.exists(f'{data_dir}/{embedding_file}'):
    generate_new_embeddings=False
    
with torch.no_grad():
    if generate_new_embeddings:

        # try:
        node_embeddings = generate_node_embeddings(G, embedding_tokenizer, embedding_model, )
        # except:
        #     node_embeddings = generate_node_embeddings(nx.DiGraph(), embedding_tokenizer, embedding_model, )

        save_embeddings(node_embeddings, f'{data_dir}/{embedding_file}')

    else:
        filename = f"{data_dir}/{embedding_file}"
        # file_path = hf_hub_download(repo_id=repository_id, filename=filename, local_dir='./')
        # print(f"File downloaded at: {file_path}")
        node_embeddings = load_embeddings(f'{data_dir}/{embedding_file}')

In [5]:
import glob
import pandas as pd
from autogen.agentchat.contrib.vectordb.base import Document

chunks_list=sorted(glob.glob(f'{data_dir}/*chunks_clean.csv'))
chunk_ids = []
chunks=[]
titles=[]
for chunks_file in chunks_list:
    print(chunks_file)
    
    f = pd.read_csv(chunks_file)
    title = chunks_file.replace('_chunks_clean.csv', '').split('/')[-1]
    f['title'] = title
    chunk_ids += list(f['chunk_id'])
    chunks += list(f['text'])
    titles += list(f['title'])

docs = [
    Document(id=id, content=chunk, metadata={"title": title})
    for id, chunk, title in zip(chunk_ids, chunks, titles)
]

./GRAPHDATA_TSMC/A_literature_review_on_variability_in_semiconductor_manufacturing_The_next_forward_leap_to_Industry_4.0_chunks_clean.csv
./GRAPHDATA_TSMC/Atomic Layer Processing_semiconductor_chunks_clean.csv
./GRAPHDATA_TSMC/Dependences of bottom and sidewall etch rates on biasvoltage and source power during the etching of poly-Si andfluorocarbon polymer usingSF6, C4F8,andO2plasmas_chunks_clean.csv
./GRAPHDATA_TSMC/Etch Mechanism Study in Gate Patterning for 14 nm Node and_chunks_clean.csv
./GRAPHDATA_TSMC/Future_of_plasma_etching_for_microelectronics_Challenges_chunks_clean.csv
./GRAPHDATA_TSMC/High-density vertical sidewall MoS2 transistors through T-shape vertical lamination_chunks_clean.csv
./GRAPHDATA_TSMC/Influence_of_sidewall_thickness_variation_on_transfer_characteristics_of_L-shaped_Impact-ionization_MOS_transistor_chunks_clean.csv
./GRAPHDATA_TSMC/Interconnect Characterization Accuracy, Methodology, and Practical_chunks_clean.csv
./GRAPHDATA_TSMC/Model analysis of the featu

In [6]:
def custom_token_count_function(text, placeholder=''):
    return len(embedding_tokenizer.encode(text))

In [7]:
import importlib
from typing import Optional, Union, cast, TypeVar

import numpy as np
import numpy.typing as npt

from chromadb.api.types import EmbeddingFunction, Embeddings
from chromadb import PersistentClient
import torch

import networkx as nx

Embeddable = Union[str, nx.DiGraph]
D = TypeVar("D", bound=Embeddable, contravariant=True)

class TransformerEmbeddingFunction(EmbeddingFunction[D]):
    def __init__(
            self,
            embedding_tokenizer,
            embedding_model,
            cache_dir: Optional[str] = None,
    ):
        try:
            from transformers import AutoModel, AutoTokenizer

            self._torch = importlib.import_module("torch")
            self._tokenizer = embedding_tokenizer #AutoTokenizer.from_pretrained(model_name)
            self._model = embedding_model #AutoModelForCausalLM.from_pretrained(model_name, device_map="cuda:0", cache_dir=cache_dir)
        except ImportError:
            raise ValueError(
                "The transformers and/or pytorch python package is not installed. Please install it with "
                "`pip install transformers` or `pip install torch`"
            )

    @staticmethod
    def _normalize(vector: npt.NDArray) -> npt.NDArray:
        """Normalizes a vector to unit length using L2 norm."""
        norm = np.linalg.norm(vector)
        if norm == 0:
            return vector
        return vector / norm

    def __call__(self, input: D) -> Embeddings:
        inputs = self._tokenizer(
            input, padding=True, truncation=True, return_tensors="pt"
        ).to('cuda')
        with self._torch.no_grad():
            outputs = self._model(**inputs)
        try:
            embeddings = outputs.last_hidden_state.mean(dim=1).detach().numpy()  # mean pooling
        except:
            embeddings = outputs.hidden_states[-1].mean(dim=1).detach().to(torch.float).cpu().numpy()
            
        return [e.tolist() for e in self._normalize(embeddings)]
            
embedding_function = TransformerEmbeddingFunction(embedding_tokenizer=embedding_tokenizer, embedding_model=embedding_model)
client = PersistentClient(path=embedding_doc_file)


⚠️ It looks like you upgraded from a version below 0.6 and could benefit from vacuuming your database. Run chromadb utils vacuum --help for more information.


In [8]:
os.environ["CHROMADB_MAX_BATCH_SIZE"]="15" # only support single GPU, "20" is for V100 32G. For A100 80G, try "50'
from autogen.agentchat.contrib.vectordb.chromadb import ChromaVectorDB

ChromaDB=ChromaVectorDB(client=client, embedding_function=embedding_function)

try:
    client.get_collection('5books_70b', embedding_function=embedding_function)
    ChromaDB.active_collection=client.get_or_create_collection('5books_70b', embedding_function=embedding_function)
except:
    client.get_or_create_collection('5books_70b', embedding_function=embedding_function)
    ChromaDB.active_collection=client.get_or_create_collection('5books_70b', embedding_function=embedding_function)
    ChromaDB.insert_docs(docs=docs, collection_name='5books_70b', upsert=True)
# try:
#     ChromaDB.active_collection=client.get_or_create_collection('5books_70b', embedding_function=embedding_function)
#     print(ChromaDB.retrieve_docs('Hello World!'))
# except:
#     pass    
#     client.delete_collection('5books_70b')
#     ChromaDB.insert_docs(docs=docs, collection_name='5books_70b', upsert=False)

In [9]:
import networkx as nx
G = nx.read_graphml(f'{data_dir_output}/5books_70b.graphml')
relation = nx.get_edge_attributes(G, "title")
nx.set_edge_attributes(G, relation, "relation")
nx.set_node_attributes(G, nx.pagerank(G), "pr")

print(f'KG loaded: {G}')

KG loaded: DiGraph with 14365 nodes and 26743 edges


In [10]:
from GraphReasoning import load_embeddings
embedding_file='TSMC_KG_70b.pkl'
generate_new_embeddings=True

if os.path.exists(f'{data_dir}/{embedding_file}'):
    generate_new_embeddings=False

if generate_new_embeddings:
    try:
        node_embeddings = generate_node_embeddings(G, embedding_tokenizer, embedding_model, )
    except:
        node_embeddings = generate_node_embeddings(nx.DiGraph(), embedding_tokenizer, embedding_model, )
        
    save_embeddings(node_embeddings, f'{data_dir}/{embedding_file}')

else:
    filename = f"{data_dir}/{embedding_file}"
    # file_path = hf_hub_download(repo_id=repository_id, filename=filename, local_dir='./')
    # print(f"File downloaded at: {file_path}")
    node_embeddings = load_embeddings(f'{data_dir}/{embedding_file}')

In [11]:
import hashlib
import os
import re
import uuid
from typing import Any, Callable, Dict, List, Literal, Optional, Tuple, Union
from IPython import get_ipython
from GraphReasoning import collect_entities

'''
try:
    import chromadb
except ImportError as e:
    raise ImportError(
        f"{e}. You can try `pip install autogen-agentchat[retrievechat]~=0.2`, or install `chromadb` manually."
    )
''' 
from autogen.agentchat import UserProxyAgent
from autogen.agentchat.agent import Agent
from autogen.agentchat.contrib.vectordb.base import Document, QueryResults, VectorDB, VectorDBFactory
from autogen.agentchat.contrib.vectordb.utils import (
    chroma_results_to_query_results,
    filter_results_by_distance,
    get_logger,
)
from autogen.code_utils import extract_code
from autogen.retrieve_utils import (
    TEXT_FORMATS,
    query_vector_db,
)
from autogen.token_count_utils import count_token

from autogen.formatting_utils import colored

logger = get_logger(__name__)


class HybridGraphRAGAgent(UserProxyAgent):
    """(In preview) The Graph Retrieval-Augmented User Proxy retrieves information from knowledge graphs based on the embedding
    similarity, and sends them along with the question to this or next assistant
    """

    def __init__(
        self,
        name="HybridGraphRAGAgent",  # default set to RetrieveChatAgent
        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
        is_termination_msg: Optional[Callable[[Dict], bool]] = None,
        retrieve_config: Optional[Dict] = None,  # config for the retrieve agent
        **kwargs,
    ):
        r"""
        Args:
            name (str): name of the agent.

            human_input_mode (str): whether to ask for human inputs every time a message is received.
                Possible values are "ALWAYS", "TERMINATE", "NEVER".
                1. When "ALWAYS", the agent prompts for human input every time a message is received.
                    Under this mode, the conversation stops when the human input is "exit",
                    or when is_termination_msg is True and there is no human input.
                2. When "TERMINATE", the agent only prompts for human input only when a termination
                    message is received or the number of auto reply reaches
                    the max_consecutive_auto_reply.
                3. When "NEVER", the agent will never prompt for human input. Under this mode, the
                    conversation stops when the number of auto reply reaches the
                    max_consecutive_auto_reply or when is_termination_msg is True.

            is_termination_msg (function): a function that takes a message in the form of a dictionary
                and returns a boolean value indicating if this received message is a termination message.
                The dict can contain the following keys: "content", "role", "name", "function_call".

            retrieve_config (dict or None): config for the retrieve agent.

                To use default config, set to None. Otherwise, set to a dictionary with the
                following keys:
                - `vector_db` (VectorDB) - the exisiting vector db for the retrieve chat.
                    It should be an instance of the VectorDB protocol.
                - `context_max_tokens` (Optional, int) - the context max token size for the
                    retrieve chat.
                    If key not provided, a default size `max_tokens * 0.8` will be used.
                - `custom_token_count_function` (Optional, Callable) - a custom function to count the
                    number of tokens in a string.
                    The function should take (text:str, model:str) as input and return the
                    token_count(int). the retrieve_config["model"] will be passed in the function.
                    Default is autogen.token_count_utils.count_token that uses tiktoken, which may
                    not be accurate for non-OpenAI models.
                - `distance_threshold` (Optional, float) - the threshold for the distance score, only
                    distance smaller than it will be returned. Will be ignored if < 0. Default is -1.
                - `_knowledge_graph` (Optional, nx.DiGraph) - a knowledge graph for RAG

            `**kwargs` (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
        """
        super().__init__(
            name=name,
            human_input_mode=human_input_mode,
            **kwargs,
        )
        self._retrieve_config = {} if retrieve_config is None else retrieve_config
        self._task = self._retrieve_config.get("task", "default")
        self._vector_db = self._retrieve_config.get("vector_db", None)
        self._model = self._retrieve_config.get("model", None)
        self._max_tokens = self._retrieve_config.get("max_tokens", 8000)
        self._knowledge_graph = self._retrieve_config.get("knowledge_graph", None)
        self.customized_prompt = self._retrieve_config.get("customized_prompt", None)
        self.custom_token_count_function = self._retrieve_config.get("custom_token_count_function", count_token)
        self._context_max_tokens = self._retrieve_config.get("context_max_tokens", self._max_tokens * 0.8)
        self._n_results = self._retrieve_config.get("n_results", 5)
        self._distance_threshold = self._retrieve_config.get("distance_threshold", -1)

        
        self._ipython = get_ipython()
        self._results = []  # the results of the current query
        self._intermediate_answers = set()  # the intermediate answers
        self._doc_contents = []  # the contents of the current used chunk
        self._doc_ids = []  # the ids of the current used chunk 
        self._current_docs_in_context = []  # the ids of the current context sources
        self.register_reply(Agent, HybridGraphRAGAgent._generate_retrieve_user_reply, position=2)

        if not isinstance(self._vector_db, VectorDB):
            logger.error('You must provide an instance of vectordb')
            return

    def _reset(self, intermediate=False):
        # self._doc_idx = -1  # the index of the current used doc
        self._results = []  # the results of the current query
        if not intermediate:
            self._intermediate_answers = set()  # the intermediate answers
            self._doc_contents = []  # the contents of the current used doc
            self._doc_ids = []  # the ids of the current used doc
            
#     def graphRAG(self, id):
#         edges = list(self._knowledge_graph.out_edges(data=True))
#         nodes = set()
        
#         # for info in self._results[0]:
#         #     id = info[0]['id']
# #             if 'community_summary' in id:
# #                 nodes |= set(eval(doc['metadata']['community']))
                
# #             else:
#         for edge in edges:
#             if edge[2]['chunk_id'] == id:
#                 nodes.add(edge[0])
#                 nodes.add(edge[1])
#         return collect_entities(self._knowledge_graph.subgraph(nodes))
    
    def graphRAG(self, id):  
        edges = list(self._knowledge_graph.out_edges(data=True))
        nodes = set()
        
        # for info in self._results[0]:
        #     id = info[0]['id']
#             if 'community_summary' in id:
#                 nodes |= set(eval(doc['metadata']['community']))
#             else:
        for edge in edges:
            if edge[2]['chunk_id'] == id:
                nodes.add(edge[0])
                nodes.add(edge[1])
        return collect_entities(self._knowledge_graph.subgraph(nodes))
    
    def _get_context(self):
        doc_contents = ""
        self._current_docs_in_context = []
        current_tokens = 0
        # _doc_idx = self._doc_idx
        for idx, doc in enumerate(self._results[0]):
            doc = doc[0]
            # if idx <= _doc_idx:
            #     continue
            # if doc["id"] in self._doc_ids:
            #     continue
            graph_results = self.graphRAG(doc['id'])
            _doc_tokens = self.custom_token_count_function(doc["content"]+graph_results) 
            
#             # if _doc_tokens > self._context_max_tokens:
#             #     func_print = f"Skip doc_id {doc['id']} as it is too long to fit in the context."
#             #     print(colored(func_print, "green"), flush=True)
#             #     # self._doc_idx = idx
#             #     continue
            
#             if current_tokens + _doc_tokens > self._context_max_tokens - 100:
#                 break

            func_print = f"Adding content of doc {doc['id']} to context from: {doc['metadata']['title']}"
            print(colored(func_print, "green"), flush=True)

            # doc_contents += doc["content"] + "\n"
            # _metadata = doc.get("metadata")
            # if isinstance(_metadata, dict):
                # self._current_docs_in_context.append(_metadata.get("source", ""))
            
            # self._doc_idx = idx
            # self._doc_ids.append(doc["id"])
            # self._doc_contents.append(doc["content"])
            doc_contents += f"The following information related to your question is from TITLE: {' '.join(doc['metadata']['title'].split('_'))} "
            doc_contents += f"Source text: {doc['content']}\n"     
            doc_contents += f"Relationships of the knowledge: {self.graphRAG(doc['id'])}\n"
            
            current_tokens += _doc_tokens
            func_print = f"Current tokens in use: {current_tokens}"
            print(colored(func_print, "green"), flush=True)

        return doc_contents

    def _generate_retrieve_user_reply(
        self,
        messages: Optional[List[Dict]] = None,
        sender: Optional[Agent] = None,
        config: Optional[Any] = None,
    ) -> Tuple[bool, Union[str, Dict, None]]:
        """In this function, we will update the context and reset the conversation based on different conditions.
        We'll update the context and reset the conversation if update_context is True and either of the following:
        """
        # if config is None:
        #     config = self
        # if messages is None:
        #     messages = self._oai_messages[sender]
        # _message = messages[-1]
        
        
        self._reset(intermediate=True)

        response = self.generate_oai_reply(messages=[self._oai_messages[sender][-1]], sender=sender)
        
        problems = response[1].split('\n')
        
        final_response = ''
        
#         for problem in problems:
#             if len(problem) <= 5:
#                 continue
#             if "?" not in problem:
#                 continue
#             final_response += f"Let's first solve sub-question: {problem}\n"
#             self.retrieve_docs(
#                 problem=problem,
#             )

#             final_response += self._get_context()
            
#             self.clear_history(sender)
#             sender.clear_history(self)        
#             break

        problem = problems[-1]
        final_response += f"{problem}\n"
        self.retrieve_docs(
            problem=problem,
        )

        final_response += self._get_context()

        self.clear_history(sender)
        sender.clear_history(self)        


        return True, final_response # self._generate_message(doc_contents, problem=_message, task=self._task)


    def retrieve_docs(self, problem: Union[str, List[str]] = None):
        """Retrieve docs based on the given problem and assign the results to the class property `_results`.
        The retrieved docs should be type of `QueryResults` which is a list of tuples containing the document and
        the distance.

        Args:
            problem (str): the problem to be solved.
            n_results (int): the number of results to be retrieved. Default is 20.

        Returns:
            None.
        """
        print(colored("Retrieving for:", "green"), end = " ")
        print(f"{problem}")
        
        if isinstance(self._vector_db, VectorDB):
            kwargs = {}
            results = self._vector_db.retrieve_docs(
                queries=[problem],
                n_results=self._n_results,
                distance_threshold=self._distance_threshold,
                **kwargs,
            )
            self._results = results
            
            # print("VectorDB returns doc_ids: ", [[r[0]["id"] for r in rr] for rr in results])
            return

        

In [12]:
from openai import OpenAI
from GraphReasoning import extract_keywords_to_nodes, find_shortest_path_subgraph_between_nodes
# local_search(question, generate, graph, node_embeddings, embedding_tokenizer, embedding_model, N_samples=5, similarity_threshold=0.9)
class llm:
    def __init__(self, llm_config):
        self.client = OpenAI(api_key=llm_config["api_key"],
                             base_url=llm_config["base_url"],
                             )
        self.model = llm_config["model"]
        self.max_tokens = llm_config["max_tokens"]
        
    def generate_cli(self, system_prompt="You are an expert in this field. Try your best to give a clear and concise answer.", 
                           prompt="Hello world! I am", temperature=0,
                           ):     
        try:
            if system_prompt==None:
                messages=[
                    {"role": "user", "content": prompt},

                ]

            else:
                messages=[
                    {"role": "system",  "content": system_prompt},
                    {"role": "user", "content": prompt},

                ]
            result=self.client.chat.completions.create(
                    model=self.model,
                    messages=messages,
                    temperature=temperature,
                    max_tokens=self.max_tokens,
                )

            return result.choices[0].message.content
        except:
            return ''
        
llm=llm(llm_config=config_list[0])        
generate = llm.generate_cli

class GraphRAGAgent(UserProxyAgent):
    """(In preview) The Graph Retrieval-Augmented User Proxy retrieves information from knowledge graphs based on the embedding
    similarity, and sends them along with the question to this or next assistant
    """

    def __init__(
        self,
        name="GraphRAGChatAgent",  # default set to RetrieveChatAgent
        human_input_mode: Literal["ALWAYS", "NEVER", "TERMINATE"] = "NEVER",
        is_termination_msg: Optional[Callable[[Dict], bool]] = None,
        generate=None,
        node_embeddings=None,
        embedding_tokenizer=None,
        embedding_model=None,
        retrieve_config: Optional[Dict] = None,  # config for the retrieve agent
        **kwargs,
    ):
        r"""
        Args:
            name (str): name of the agent.

            human_input_mode (str): whether to ask for human inputs every time a message is received.
                Possible values are "ALWAYS", "TERMINATE", "NEVER".
                1. When "ALWAYS", the agent prompts for human input every time a message is received.
                    Under this mode, the conversation stops when the human input is "exit",
                    or when is_termination_msg is True and there is no human input.
                2. When "TERMINATE", the agent only prompts for human input only when a termination
                    message is received or the number of auto reply reaches
                    the max_consecutive_auto_reply.
                3. When "NEVER", the agent will never prompt for human input. Under this mode, the
                    conversation stops when the number of auto reply reaches the
                    max_consecutive_auto_reply or when is_termination_msg is True.

            is_termination_msg (function): a function that takes a message in the form of a dictionary
                and returns a boolean value indicating if this received message is a termination message.
                The dict can contain the following keys: "content", "role", "name", "function_call".

            retrieve_config (dict or None): config for the retrieve agent.

                To use default config, set to None. Otherwise, set to a dictionary with the
                following keys:
                - `vector_db` (VectorDB) - the exisiting vector db for the retrieve chat.
                    It should be an instance of the VectorDB protocol.
                - `context_max_tokens` (Optional, int) - the context max token size for the
                    retrieve chat.
                    If key not provided, a default size `max_tokens * 0.8` will be used.
                - `custom_token_count_function` (Optional, Callable) - a custom function to count the
                    number of tokens in a string.
                    The function should take (text:str, model:str) as input and return the
                    token_count(int). the retrieve_config["model"] will be passed in the function.
                    Default is autogen.token_count_utils.count_token that uses tiktoken, which may
                    not be accurate for non-OpenAI models.
                - `distance_threshold` (Optional, float) - the threshold for the distance score, only
                    distance smaller than it will be returned. Will be ignored if < 0. Default is -1.
                - `_knowledge_graph` (Optional, nx.DiGraph) - a knowledge graph for RAG

            `**kwargs` (dict): other kwargs in [UserProxyAgent](../user_proxy_agent#__init__).
        """
        super().__init__(
            name=name,
            human_input_mode=human_input_mode,
            **kwargs,
        )
        self._retrieve_config = {} if retrieve_config is None else retrieve_config
        self._knowledge_graph = self._retrieve_config.get("knowledge_graph", None)
        self._n_results = self._retrieve_config.get("n_results", 5)
        self._distance_threshold = self._retrieve_config.get("distance_threshold", 0.9)
        
        self.generate = generate
        self.node_embeddings = node_embeddings
        self.embedding_tokenizer = embedding_tokenizer
        self.embedding_model = embedding_model
        
        self._ipython = get_ipython()
        self._results = []  # the results of the current query
        self._intermediate_answers = set()  # the intermediate answers
        self.register_reply(Agent, GraphRAGAgent._generate_retrieve_user_reply, position=2)

    def _reset(self, intermediate=False):
        # self._doc_idx = -1  # the index of the current used doc
        self._results = []  # the results of the current query
        if not intermediate:
            self._intermediate_answers = set()  # the intermediate answers
            self._doc_contents = []  # the contents of the current used doc
            self._doc_ids = []  # the ids of the current used doc
            
    def graphRAG(self, message):  
        nodes = extract_keywords_to_nodes(message, self.generate, self.node_embeddings, self.embedding_tokenizer, self.embedding_model, self._n_results, similarity_threshold=self._distance_threshold)
        subgraph = find_shortest_path_subgraph_between_nodes(self._knowledge_graph.to_undirected(), nodes)
        return collect_entities(self._knowledge_graph.subgraph(subgraph))


    def _generate_retrieve_user_reply(
        self,
        messages: Optional[List[Dict]] = None,
        sender: Optional[Agent] = None,
        config: Optional[Any] = None,
    ) -> Tuple[bool, Union[str, Dict, None]]:
        """In this function, we will update the context and reset the conversation based on different conditions.
        We'll update the context and reset the conversation if update_context is True and either of the following:
        """
        # if config is None:
        #     config = self
        # if messages is None:
        #     messages = self._oai_messages[sender]
        # _message = messages[-1]
        
        
        self._reset(intermediate=True)

        
        relationships = self.graphRAG(self._oai_messages[sender][-1])
                
        final_response = f"Please consider the following relationships of the knowledge related to the question and make your response: {relationships}"

        self.clear_history(sender)
        sender.clear_history(self)        

        return True, final_response # self._generate_message(doc_contents, problem=_message, task=self._task)


In [13]:
user_proxy = autogen.UserProxyAgent(
    name="Admin",
    system_message="A human admin. Interact with the engineer to discuss the QA result.",
    human_input_mode="NEVER",
    code_execution_config=False,
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
    # is_termination_msg=lambda x: x.get("content", "").rstrip().endswith("TERMINATE"),
)

graph_rag_agent = GraphRAGAgent(
    name="graph_rag_agent",
    system_message="""RAG agent. 
""",
    human_input_mode="NEVER",
    code_execution_config=False,
    generate = generate,
    node_embeddings = node_embeddings,
    embedding_tokenizer = embedding_tokenizer,
    embedding_model = embedding_model,
    retrieve_config={
        "n_results": 5,
        "knowledge_graph": G,
        "distance_threshold": 1.5
    },
    llm_config=llm_config,
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(),
)

planner = autogen.AssistantAgent(
    name="Planner",
    llm_config=llm_config,
    system_message="""Planner, more of a supervising project manager who works with other engineers but only has basic understanding about semiconductors. Don't write code.
You will come up a concise but through plan, breaking down the problem into several sub-questions that can help solve it step by step. These will be the core of the discussion.
Don't make assumption. Only adopt the ideas from other agents. Don't write citation or references
For example, when you try to plan an etching task, your answer should consider some break-down subquestions in mind like the example below: 
1. What kind of gas we are using is one of the most important issue which can lead to different side effects
2. What selectivity ratio we are adopting is also critical as it can affect how aggresive the etching process is going.
3. Any suggeted side wall protection to improve the overall performance?
4. Any suggeted step to help debris reduction?
At the end of your response, always put the highest-priorty sub-question for the agents to answer in this format: 'QUESTION: ... ?'. 
Don't revise the sub-question when the critic gives a good score higher than 6/10 because that would waste our time. Just go ahead for the next sub-question.
When all the subquestions have been answered, add "WRITE REPORT" at the end of your response.
""",
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
)

# planner = autogen.AssistantAgent(
#     name="Planner",
#     llm_config=llm_config,
#     system_message="""Planner, more of a project manager who works with other engineers but only has basic understanding about semiconductors. Don't write code.
#     You will come up a concise plan, breaking down the problem into several sub-questions that can help solve it step by step. These will be the core of the discussion.
#     At the end of your response, always put the highest-priorty sub-question for the agents to answer in this format: 'QUESTION: ... ?'. 
#     If an answer scores higher than and equal to 7/10, proceed with the next sub-question.
#     Otherwise, modify that question and follow up to improve the answer only once, and then move on.
#     If you think all the sub-questions have been handled, which means the main problem can be solved with all the sub-answers combined, you should write a concise report that includes all the sub-questions and all the sub-answers while keeping the citations in academic style with the citations as is.
# """,
#     is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
# )


hgraph_rag_agent = HybridGraphRAGAgent(
    name="hybrid_graph_rag_agent",
    system_message="""RAG agent. 
First look at the message you recieve and extract the question of the highest priority, often starting with QUESTION: ... , and pass it on in question-only format, such as: QUESTION: ...?
There should be always a question to consider so do not make up questions. If all the questions seem to be solved already, just answer 'TERMINATE' only.
""",
    human_input_mode="NEVER",
    code_execution_config=False,
    # max_consecutive_auto_reply=5,
    # llm_config=llm_config,
    llm_config=llm_config,
    retrieve_config={
        "custom_token_count_function": custom_token_count_function,
        "vector_db": ChromaDB,
        "n_results": 5,
        "max_tokens": llm_config["max_tokens"],
        "knowledge_graph": G,
        "distance_threshold": 1.5
    },
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
)

engineer = autogen.AssistantAgent(
    name="Engineer",
    llm_config=llm_config,
    system_message="""Engineer with semiconductor backgrounds. Don't write code.
Start your response with: QUESTION: ...? \n ANSWER: ... .
You should always use the information you recieve from the other agents and don't make assumption. You should keep references when you use the provided information from another agent
Write your answer strictly in academic style with citations such as '<something true> [1]' and a references section with [1] <REFERENCE TITLE>: <reasons> and following the number in all your answers to make sure your citation is not overlapping.
Don't ever cite any sources that are not from the information you have. If you have an idea that is hypothetical, only mark it in your response.
Don't indirect cite the reference from the source texts.
Add "\nCLEAR HISTORY graph_rag_agent" at the end of your reply
""",
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
)

critic = autogen.AssistantAgent(
    name="Critic",
    # llm_config=llm_config,
    llm_config=semikong_config,
    system_message="""Senior engineer critic with semiconductor background. Don't write code.
Concisely criticize or approve whether the current sub-answer from an agent can solve the question.
Fairly evalute the completeness of the answer to the question into score on a scale of 1 to 10, 6 being acceptable. Below is the score policy you should consider.
1. Credibility: 0 to 5/5 for how much content of the answer is aquired by the information from the knowledge graphs or source texts from agents?
2. Correctness: 0 to 5/5 for how good is the answer to address the question. Does it reason the answer well?
3. Creativity: this does not give points but please report whether the answer provides any new insights based on the reasoning results.
""",
    # no more scoring, just raise some potential issues for the summarizer to organize a report
    # planner do not revise or consider the suggestion from critic to ensure the chat flow.
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
)

summarizer = autogen.AssistantAgent(
    name="Summarizer",
    llm_config=llm_config,
    system_message="""Engineer with semiconductor backgrounds. Don't write code.
You write a academic-style report about the task and its subquestions and subanswers discussed by all the agents. 
The format of your report should have all the components listed below with reorganized citations that you see only in the previous responses from the engineer agents:
1. The definition of the task 
2. Subquestions that can help solve the task and the corresponding subanswers numbering starting from a, b, c ... 
For example,
Sub-question a: ... . Sub-answer a: ...
Sub-question b: ... . Sub-answer b: ...
Sub-question c: ... . Sub-answer c: ...
3. Potential issues reported
4. Summary
5. References
You only summarize the information in the chat history. Don't add new questions or assumptions. You should keep references when you use the provided information from another agent
While writing the report strictly in academic style with citations such as '<something true> [1]' and a references section with [1] <REFERENCE TITLE>: <reasons> and following the number in all your answers to make sure your citation is not overlapping.
Don't ever cite any sources that are not from the information you have. If you have an idea that is hypothetical, only mark it in your response.
If you think all the sub-questions have been handled, you should combine all the sub-questions and all the sub-answers into a concise report, keeping the original references and reasoning in the same format and numbering. No need to make too much edition. 

Add "TERMINATE" at the end of your reply
""",
    is_termination_msg=lambda x: "TERMINATE" in x.get("content", "").replace("*", "").rstrip(), 
)

In [14]:
agents = [user_proxy, graph_rag_agent, planner, hgraph_rag_agent, engineer, critic, summarizer]
def graphRAG_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):
    """Define a customized speaker selection function.
    A recommended way is to define a transition for each speaker in the groupchat.

    Returns:
        Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.
    """
    messages = groupchat.messages
    if len(messages)==0:
        return graph_rag_agent
    else:
        if "WRITE REPORT" in messages[-1]["content"].upper():
            return summarizer

        if last_speaker is graph_rag_agent:
            return planner
        
        if last_speaker is planner:
            return hgraph_rag_agent

        elif last_speaker is graph_rag_agent:
            return engineer

        elif last_speaker is engineer:
            # for i, message in enumerate(groupchat.messages):
            #     if message['name'] == 'graph_rag_agent':
            #         groupchat.messages.pop(i)
            groupchat = [groupchat.messages[-1]]
            return critic

        elif last_speaker is critic:
            return planner
    
    return "auto"

groupchat = autogen.GroupChat(
    agents=agents,
    messages=[],
    max_round=100,
    speaker_selection_method=graphRAG_speaker_selection_func,
    enable_clear_history=True,
    # speaker_selection_method='round_robin',
)

manager = autogen.GroupChatManager(groupchat)


In [15]:
import shutil
try:
    shutil.rmtree('.cache')
except:
    pass


In [16]:
result = user_proxy.initiate_chat(
    manager,
    message=f'''
Consider a two-stage etching problem on a poly-silicon: 
In the first stage, we need to do an HAR etch with 30 nm of width and 100 nm of depth.
In the second stage, we need to perform a selectivity ratio of 22:1 (poly-silicon to SiO2) for an etch into another 45 nm of depth and the same width.  
The base is STI OX.

''',
)


[33mAdmin[0m (to chat_manager):


Consider a two-stage etching problem on a poly-silicon: 
In the first stage, we need to do an HAR etch with 30 nm of width and 100 nm of depth.
In the second stage, we need to perform a selectivity ratio of 22:1 (poly-silicon to SiO2) for an etch into another 45 nm of depth and the same width.  
The base is STI OX.



--------------------------------------------------------------------------------
[32m
Next speaker: graph_rag_agent
[0m
Extract keywords: ['two-stage', 'etching', 'poly-silicon', 'HAR etch', 'selectivity ratio', 'SiO2', 'STI OX']
Found ['two zones', 'etchant', 'poly-si film', 'secco etch', 'selectivity', 'si/sio2 interface', 'i-£-pcd method'] in node_embeddings
Path between two zones, etchant found as ['two zones', 'triodes', 'three-element discharge systems', 'triode reactors', 'etch rate', 'etchant']
Path between two zones, poly-si film found as ['two zones', 'triodes', 'three-element discharge systems', 'triode reactors', 'ion bomb

2025-02-13 01:07:46,246 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: What type of plasma etchant should be used for the first stage (HAR etch) to achieve 30 nm width and 100 nm depth while minimizing damage to the STI OX base?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc ac53d04060b65f0efc0e13316f37369a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1223[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1898[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2459[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3077[0m
[33mhybrid_graph_rag_agent

2025-02-13 01:11:35,478 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: How can we optimize the ion bombardment in the first stage to enhance anisotropic etching and control the etch rate, considering the high aspect ratio of 3:1 (depth:width)?
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 561[0m
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1204[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1879[0m
[32mAdding content of doc 9e097315a8354c0ba51d3726688dd2b2 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2403[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3021[0m
[33mhybrid_g

2025-02-13 01:15:11,604 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: For the second stage, what specific plasma etchant should be used to achieve a selectivity ratio of 22:1 (poly-silicon to SiO2) for an additional 45 nm depth etch?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc 959ab0ce4c96daab3f38db38426919d3 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1203[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1764[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2439[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3057[0m
[33mhybrid_graph_rag_

2025-02-13 01:19:55,415 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: How can we maintain precise control over the etch rate in the second stage to prevent excessive etching of the SiO2 layer, given its relatively low etch rate compared to poly-silicon?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1204[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1879[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2497[0m
[32mAdding content of doc 1f3d5f92c9020d120e1ee43cb4017b87 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3064[0m
[

2025-02-13 01:26:14,653 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: What measures can be taken to minimize metallic contaminants and their impact on the etching process, considering the critical nature of the si/sio2 interface in semiconductor manufacturing?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1318[0m
[32mAdding content of doc ac53d04060b65f0efc0e13316f37369a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1898[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2516[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3077

In [17]:
result = user_proxy.initiate_chat(
    manager,
    message=f'''
Consider a two-stage etching problem on a poly-silicon: 
In the first stage, we need to do an HAR etch with 30 nm of width and 100 nm of depth.
In the second stage, we need to perform a selectivity ratio of 22:1 (poly-silicon to SiO2) for an etch into another 45 nm of depth and the same width.  
The base is STI OX.

We need to protect the sidewalls from etching and keep them vertical. We also need to keep sediment or debris a little as possible.
What kind of gas and combination in ratio should we apply? Any potential issue we need to consider? 

''',
)


[33mAdmin[0m (to chat_manager):


Consider a two-stage etching problem on a poly-silicon: 
In the first stage, we need to do an HAR etch with 30 nm of width and 100 nm of depth.
In the second stage, we need to perform a selectivity ratio of 22:1 (poly-silicon to SiO2) for an etch into another 45 nm of depth and the same width.  
The base is STI OX.

We need to protect the sidewalls from etching and keep them vertical. We also need to keep sediment or debris a little as possible.
What kind of gas and combination in ratio should we apply? Any potential issue we need to consider? 



--------------------------------------------------------------------------------
[32m
Next speaker: graph_rag_agent
[0m
Extract keywords: ['two-stage etching', 'poly-silicon', 'HAR etch', 'selectivity ratio', 'SiO2', 'STI OX', 'sidewall protection', 'vertical sidewalls', 'debris reduction', 'etching gas', 'gas combination', 'ratio optimization']
Found ['efficient etching', 'poly-si film', 'secco etch', 's

2025-02-13 08:12:42,385 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: What is the most suitable etchant gas combination for achieving anisotropic etching in the first stage while maintaining a high selectivity ratio in the second stage?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1204[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1879[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2497[0m
[32mAdding content of doc 3d9ae709bf7a06b469d3954c857dd464 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3228[0m
[33mhybrid_graph_r

2025-02-13 08:15:29,175 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: What type of passivation layer or technique can be used to protect the sidewalls during the etching process, while maintaining a high selectivity ratio in the second stage?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1318[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1879[0m
[32mAdding content of doc ac53d04060b65f0efc0e13316f37369a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2459[0m
[32mAdding content of doc 959ab0ce4c96daab3f38db38426919d3 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3019[0m
[33mhybrid_g

2025-02-13 08:19:41,658 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: What is the most suitable gas composition and pressure for achieving a selectivity ratio of 22:1 (poly-silicon to SiO2) in the second stage?
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 643[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1204[0m
[32mAdding content of doc fb21d7b22af197653d8ebd580be84bd1 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1783[0m
[32mAdding content of doc 9e097315a8354c0ba51d3726688dd2b2 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2307[0m
[32mAdding content of doc d6936e4a4053e8c331d477e0e621d1d9 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2978[0m
[33mhybrid_graph_rag_agent[0m (to chat_mana

2025-02-13 08:24:49,682 - autogen.agentchat.contrib.vectordb.chromadb - INFO - No collection is specified. Using current active collection 5books_70b.[0m


[32mRetrieving for:[0m QUESTION: What are some effective methods for reducing debris or sediment during the etching process, particularly in the context of high-aspect-ratio etching?
[32mAdding content of doc d27fd3aab9f69bc9e80f9ba9fcabfd48 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 675[0m
[32mAdding content of doc 1f3d5f92c9020d120e1ee43cb4017b87 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1242[0m
[32mAdding content of doc 295615b497490fde13ac029333dba53a to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 1885[0m
[32mAdding content of doc 84a6c1186a2a63c8dea0472c5977f686 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 2446[0m
[32mAdding content of doc 56c7ad8f5a09d0cb8ab0f639bd7c0525 to context from: Atomic Layer Processing_semiconductor[0m
[32mCurrent tokens in use: 3064[0m
[33mhybrid_graph_rag_agent[0m (to 

In [None]:

agents = [user_proxy, engineer2]
def graphRAG_speaker_selection_func(last_speaker: Agent, groupchat: autogen.GroupChat):
    """Define a customized speaker selection function.
    A recommended way is to define a transition for each speaker in the groupchat.

    Returns:
        Return an `Agent` class or a string from ['auto', 'manual', 'random', 'round_robin'] to select a default method to use.
    """

    if last_speaker is planner:
        return graph_rag_agent
    
    if last_speaker is graph_rag_agent:
        return engineer
    
    if last_speaker is engineer:
        for i, message in enumerate(groupchat.messages):
            if message['name'] == 'graph_rag_agent':
                groupchat.messages.pop(i)
        return critic
    
    if last_speaker is critic:
        return planner
    
    return "auto"

groupchat = autogen.GroupChat(
    agents=agents,
    messages=[],
    max_round=2,
    speaker_selection_method='auto',
    # speaker_selection_method='round_robin',
)

manager = autogen.GroupChatManager(groupchat)


In [None]:
result = user_proxy.initiate_chat(
    manager,
    message=f'''
What type of gas or gas mixture is suitable for achieving a high aspect ratio (HAR) etch with 30 nm width and 100 nm depth in poly-silicon?

''',
)


In [None]:
result = user_proxy.initiate_chat(
    manager,
    message=f'''
Consider a two-stage etching problem on a poly-silicon:
In the first stage, we need to do an HAR etch with 30 nm of width and 100 nm of depth.
In the second stage, we need to perform a selectivity ratio of 22:1 (poly-silicon to SiO2) for an etch into another 45 nm of depth and the same width.  
The base is STI OX.
We need to protect the sidewalls from etching and keep them vertical. We also need to keep sediment or debris a little as possible.
What kind of gas and combination in ratio should we apply? Any potential issue we need to consider? 


''',
)
