In [1]:
import chromadb
from llama_index.core import VectorStoreIndex
from llama_index.vector_stores.chroma import ChromaVectorStore
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex, get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor
from tools import *
from prompts import *
Settings.llm = get_llm(model='gpt-4o-mini')
Settings.embed_model=get_embed()


In [2]:
# 初始化客户端
db = chromadb.PersistentClient(path="./knowledge_base/vector_base")

# 获取 collection
chroma_collection = db.get_or_create_collection("graph_func_doc")

# 将 chroma 分配为上下文中的 vector_store
vector_store = ChromaVectorStore(chroma_collection=chroma_collection)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

# 从存储的向量加载你的索引
index = VectorStoreIndex.from_vector_store(
    vector_store, storage_context=storage_context
)
# 配置检索器
retriever = VectorIndexRetriever(
    index=index,
    similarity_top_k=10,
)

# 配置响应合成器
response_synthesizer = get_response_synthesizer(
    text_qa_template=DUNC_DOC_QA_PROMPT
)

# 组装查询引擎
query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0)],
)


In [3]:
query_str="""
Picture this, you're a game tester and you're trying to figure out how players are interacting with each other within a massive multiplayer video game. In the game, each team's communication and collaboration can be represented as a graph, where each player is a node and the communication between them is an edge. You use this graph to investigate the real-time strategy (RTS) squad communication patterns. To understand this communication graph better, you decide to see if there are any distinct communities within the whole player network. You have with you a graph of the details saved in a file called 'copenhagen.gml', taken from the comprehensive Copenhagen Networks Study.\n\nTo do this, you choose to use two popular community detection algorithms - Louvain and Der - to figure out if there are any trends of players forming specific communities. However, simple detection isn't enough. You also want to understand the characteristics of these communities, such as their size and internal_edge_density, which mirrors the intensity or frequency of interactions within the communities. \n\nThe challenge here lies in comparing and visualizing the results from both algorithms: you want to visualize the relation between the community size and the internal_edge_density from the two different algorithms using \"plot_com_properties_relation\" function. This way you could easily understand how these communities vary and behave in terms of their size and density of communication.
"""
# response = query_engine.query(f"""
# 你是一个文档提取器，精通精通基础图论、图统计学习和图嵌入三个方面的知识
# 你需要根据问题{query_str}来帮我抽取出他可能需要使用到的文档,重点观察文档中使用到的函数或者算法

# """)
# print(response)

In [None]:
from llama_index.core.agent import ReActAgentWorker
llm=get_llm(model='gpt-4o-mini')
tools=get_tools()
react_worker = ReActAgentWorker.from_tools(
    tools,
    llm=llm,
    verbose=True,
)
agent=react_worker.as_agent()
res=agent.chat(f"""
你是一个文档提取器，精通精通基础图论、图统计学习和图嵌入三个方面的知识
你需要根据问题{query_str}来帮我抽取出他可能需要使用到的文档
重点观察文档中使用到的函数或者算法
函数一般含有_,算法开头一般是大写
最后你需要按json格式给出答案，包含所有文档
""")

In [None]:
print(res.response)