# 使用 pipeline

参考： [
An Introduction to LlamaIndex Query Pipelines](https://docs.llamaindex.ai/en/stable/examples/pipeline/query_pipeline)

## 准备

In [1]:
%%time

from llama_index.core import SummaryIndex, Document
from llama_index.core import Settings

from llama_index.llms.openai_like import OpenAILike
from llama_index.embeddings.ollama import OllamaEmbedding

from milvus import default_server
from pymilvus import connections, utility

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.milvus import MilvusVectorStore

from llama_index.core.llms import ChatMessage


from llama_index.core.vector_stores import (
    MetadataFilter,
    MetadataFilters,
    FilterOperator,
)

CPU times: user 3.53 s, sys: 540 ms, total: 4.07 s
Wall time: 3.69 s


In [2]:
%%time

import logging
import sys

# 设置系统日志，便于设置level排查

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

CPU times: user 325 µs, sys: 0 ns, total: 325 µs
Wall time: 329 µs


In [3]:
%%time

# 初始化全局 llm

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True,
                 temperature=0.5
                )

Settings.llm =llm

CPU times: user 118 ms, sys: 7.82 ms, total: 125 ms
Wall time: 125 ms


In [4]:
%%time

# 初始化全局 embedding 模型

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

Settings.embed_model = ollama_embedding

CPU times: user 545 ms, sys: 24.1 ms, total: 569 ms
Wall time: 568 ms


In [5]:
Settings.chunk_size=128
Settings.chunk_overlap=10

Settings

_Settings(_llm=OpenAILike(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f85553b64d0>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x7f861b2b8310>, completion_to_prompt=<function default_completion_to_prompt at 0x7f861b0e6b00>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='xiaoyu', temperature=0.5, max_tokens=None, logprobs=None, top_logprobs=0, additional_kwargs={}, max_retries=3, timeout=60.0, default_headers=None, reuse_client=True, api_key='sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750', api_base='http://192.168.0.72:3000/v1', api_version='', context_window=3900, is_chat_model=True, is_function_calling_model=False, tokenizer=None), _embed_model=OllamaEmbedding(model_name='dztech/bge-large-zh:v1.5', embed_batch_size=10, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f85553b64d0>, num_workers=None, base_url='http://19

In [6]:

from llama_index.core import SimpleDirectoryReader

reader = SimpleDirectoryReader(input_files=["books1/孔乙己.txt"])
docs = reader.load_data()
len(docs)

1

In [7]:
docs

[Document(id_='87c8322d-b7a3-42e9-b5bd-96d313fc57ce', embedding=None, metadata={'file_path': 'books1/孔乙己.txt', 'file_name': '孔乙己.txt', 'file_type': 'text/plain', 'file_size': 10245, 'creation_date': '2024-05-10', 'last_modified_date': '2024-05-10'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='孔乙己⑴\n\n\n\n\u3000\u3000鲁镇的酒店的格局，是和别处不同的：都是当街一个曲尺形的大柜台，柜里面预备着热水，可以随时温酒。做工的人，傍午傍晚散了工，每每花四文铜钱，买一碗酒，——这是二十多年前的事，现在每碗要涨到十文，——靠柜外站着，热热的喝了休息；倘肯多花一文，便可以买一碟盐煮笋，或者茴香豆，做下酒物了，如果出到十几文，那就能买一样荤菜，但这些顾客，多是短衣帮，大抵没有这样阔绰。只有穿长衫的，才踱进店面隔壁的房子里，要酒要菜，慢慢地坐喝。\n\n\u3000\u3000我从十二岁起，便在镇口的咸亨酒店里当伙计，掌柜说，样子太傻，怕侍候不了长衫主顾，就在外面做点事罢。外面的短衣主顾，虽然容易说话，但唠唠叨叨缠夹不清的也很不少。他们往往要亲眼看着黄酒从坛子里舀出，看过壶子底里有水没有，又亲看将壶子放在热水里，然后放心：在这严重兼督下，羼水也很为难。所以过了几天，掌柜又说我干不了这事。幸亏荐头的情面大，辞退不得，便改为专管温酒的一种无聊职务了。\n\n\u3000\u3000我从此便整天的站在柜台里，专

In [8]:
%%time

# 启动milvus服务器

default_server.set_base_dir('milvus_data')

# (OPTIONAL) if you want cleanup previous data
default_server.cleanup()

# Start your milvus server
default_server.start()

# Now you could connect with localhost and the given port
# Port is defined by default_server.listen_port
connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

# Stop your milvus server
# default_server.stop()

default_server.listen_port

v2.3.5-lite
CPU times: user 3.22 s, sys: 260 ms, total: 3.48 s
Wall time: 7 s


19530

In [9]:
%%time

# 从文档建立索引

vector_store = MilvusVectorStore(dim=1024, overwrite=True)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    docs, storage_context=storage_context
)

DEBUG:pymilvus.milvus_client.milvus_client:Created new connection using: c3116a3d4951492d9030e2312d4b58cf
Created new connection using: c3116a3d4951492d9030e2312d4b58cf
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created collection: llamacollection
Successfully created collection: llamacollection
DEBUG:pymilvus.milvus_client.milvus_client:Successfully created an index on collection: llamacollection
Successfully created an index on collection: llamacollection
CPU times: user 252 ms, sys: 15.6 ms, total: 268 ms
Wall time: 12.2 s


## pipeline: prompt>llm

In [10]:
%%time

from llama_index.core.query_pipeline import QueryPipeline
from llama_index.core import PromptTemplate

prompt_str = "生成与{movie_name}相关的电影"
prompt_tmpl = PromptTemplate(prompt_str)

llm_c = llm.as_query_component(streaming=True)

p = QueryPipeline(chain=[prompt_tmpl, llm_c], verbose=True)

CPU times: user 77.9 ms, sys: 8.19 ms, total: 86.1 ms
Wall time: 85.8 ms


In [11]:
%%time

output=p.run(movie_name="无间道")
for o in output:
    print(o.delta, end="")
print()

[1;3;38;2;155;135;227m> Running module ec7f11a3-5941-4ae7-addc-f5af7dfafffc with input: 
movie_name: 无间道

[0m[1;3;38;2;155;135;227m> Running module d73144da-1c1c-4785-82db-b7f83a7d3dad with input: 
messages: 生成与无间道相关的电影

[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
一部与《无间道》风格相似的电影推荐是《盗亦有道》（又名《偷天陷阱》），它同样是由香港导演吴宇森执导，讲述了一段充满智斗和背叛的警匪故事。影片中两位主角之间错综复杂的身份反转和心理较量，充满了紧张刺激的剧情和出人意料的情节，与《无间道》系列有异曲同工之妙。这部电影由尼古拉斯·凯奇（Nicholas Cage）和安吉丽娜·朱莉（Angelina Jolie）主演，视觉效果和演员表现同样精彩。
CPU times: user 189 ms, sys: 6.4 ms, total: 196 ms
Wall time: 4.75 s


## pipeline: prompt>llm>prompt>embeddings

In [21]:
%%time

# 生成有关主题的问题
Prompt_str1="请生成一个关于主题为{topic}的有关孔乙己生平的简明问题" 
prompt_tmpl1=PromptTemplate ( Prompt_str1 )

# Prompt_tmpl1

# use HyDE to hallucinate answer.
prompt_str2 = (
    "请写一段话来回答问题\n"
    "尝试包含尽可能多的关键细节。\n"
    "\n"
    "\n"
    "{query_str}\n"
    "\n"
    "\n"
    '段落:"""\n'
)
prompt_tmpl2 = PromptTemplate(prompt_str2)

retriever = index.as_retriever(similarity_top_k=5)

p = QueryPipeline(
    chain=[prompt_tmpl1, llm, prompt_tmpl2, llm, retriever], verbose=True
)

nodes = p.run(topic="喝酒")
len(nodes)

[1;3;38;2;155;135;227m> Running module 2c4285d2-ece7-4371-949f-29c5f0c383ce with input: 
topic: 喝酒

[0m[1;3;38;2;155;135;227m> Running module 56f701da-9709-472f-8af4-ca6759ea5ff1 with input: 
messages: 请生成一个关于主题为喝酒的有关孔乙己生平的简明问题

[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;155;135;227m> Running module 4a05d6c3-d040-4e2a-9195-7c2bd7c4c28b with input: 
query_str: assistant: 孔乙己在日常生活中是否经常饮酒？

[0m[1;3;38;2;155;135;227m> Running module d0a296f8-6053-4f6a-869c-eccd905593c1 with input: 
messages: 请写一段话来回答问题
尝试包含尽可能多的关键细节。


孔乙己在日常生活中是否经常饮酒？


段落:"""


[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;155;135;227m> Running module 52ad970f-61c1-4075-824b-44b5a80c37bc with input: 
input: assistant: 孔乙己，这个形象出自鲁迅

5

In [22]:
%%time

nodes = p.run(topic="喝酒")
len(nodes)

[1;3;38;2;155;135;227m> Running module 2c4285d2-ece7-4371-949f-29c5f0c383ce with input: 
topic: 喝酒

[0m[1;3;38;2;155;135;227m> Running module 56f701da-9709-472f-8af4-ca6759ea5ff1 with input: 
messages: 请生成一个关于主题为喝酒的有关孔乙己生平的简明问题

[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;155;135;227m> Running module 4a05d6c3-d040-4e2a-9195-7c2bd7c4c28b with input: 
query_str: assistant: 问题： 孔乙己在小说中与酒有何关联？他的人生是否常围绕着饮酒？

[0m[1;3;38;2;155;135;227m> Running module d0a296f8-6053-4f6a-869c-eccd905593c1 with input: 
messages: 请写一段话来回答问题
尝试包含尽可能多的关键细节。


问题： 孔乙己在小说中与酒有何关联？他的人生是否常围绕着饮酒？


段落:"""


[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;155;135;227m> Running module 52ad970f-61c1-4075-824b-44b5a80c37bc with input: 


5

## 完整的 RAG pipeline

In [25]:
%%time

from llama_index.core.postprocessor import SentenceTransformerRerank

reranker = SentenceTransformerRerank(model='/models/bge-reranker-v2-m3', top_n=5)

CPU times: user 1.38 s, sys: 1.13 s, total: 2.51 s
Wall time: 7.76 s


In [26]:
%%time

from llama_index.core.response_synthesizers import TreeSummarize

# define modules
prompt_str = "请生成一个关于主题为{topic}的有关孔乙己生平的简明问题"
prompt_tmpl = PromptTemplate(prompt_str)
retriever = index.as_retriever(similarity_top_k=3)
summarizer = TreeSummarize(llm=llm)

CPU times: user 193 µs, sys: 0 ns, total: 193 µs
Wall time: 198 µs


In [27]:
%%time

# define query pipeline
p = QueryPipeline(verbose=True)
p.add_modules(
    {
        "llm": llm,
        "prompt_tmpl": prompt_tmpl,
        "retriever": retriever,
        "summarizer": summarizer,
        "reranker": reranker,
    }
)

CPU times: user 268 µs, sys: 0 ns, total: 268 µs
Wall time: 273 µs


In [28]:
%%time

p.add_link("prompt_tmpl", "llm")
p.add_link("llm", "retriever")
p.add_link("retriever", "reranker", dest_key="nodes")
p.add_link("llm", "reranker", dest_key="query_str")
p.add_link("reranker", "summarizer", dest_key="nodes")
p.add_link("llm", "summarizer", dest_key="query_str")

# look at summarizer input keys
print(summarizer.as_query_component().input_keys)

required_keys={'query_str', 'nodes'} optional_keys=set()
CPU times: user 0 ns, sys: 163 µs, total: 163 µs
Wall time: 167 µs


In [None]:
%%time
%%capture

!pip install pyvis

In [31]:
%%time

## create graph
from pyvis.network import Network

net = Network(notebook=True, cdn_resources="in_line", directed=True)
net.from_nx(p.dag)
net.show("rag_dag.html")

rag_dag.html
CPU times: user 55.2 ms, sys: 7.37 ms, total: 62.6 ms
Wall time: 61.9 ms


![pipeline](https://docs.llamaindex.ai/en/stable/_static/query/pipeline_rag_example.png)


In [32]:
%%time

response = p.run(topic="喝酒")

[1;3;38;2;155;135;227m> Running module prompt_tmpl with input: 
topic: 喝酒

[0m[1;3;38;2;155;135;227m> Running module llm with input: 
messages: 请生成一个关于主题为喝酒的有关孔乙己生平的简明问题

[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
[1;3;38;2;155;135;227m> Running module retriever with input: 
input: assistant: 问题： 孔乙己在小说中与酒有何关联？他如何通过饮酒来展现其性格特点或生活状态？

[0m[1;3;38;2;155;135;227m> Running module reranker with input: 
query_str: assistant: 问题： 孔乙己在小说中与酒有何关联？他如何通过饮酒来展现其性格特点或生活状态？
nodes: [NodeWithScore(node=TextNode(id_='215a3a90-ba1d-4ad4-a1ba-8aa14a1eb3b3', embedding=None, metadata={'file_path': 'books1/孔乙己.txt', 'file_name': '孔乙己.txt', 'file_type': 'text/plain', 'file_size': 10245,...

[0m

Batches:   0%|          | 0/1 [00:00<?, ?it/s]

[1;3;38;2;155;135;227m> Running module summarizer with input: 
query_str: assistant: 问题： 孔乙己在小说中与酒有何关联？他如何通过饮酒来展现其性格特点或生活状态？
nodes: [NodeWithScore(node=TextNode(id_='215a3a90-ba1d-4ad4-a1ba-8aa14a1eb3b3', embedding=None, metadata={'file_path': 'books1/孔乙己.txt', 'file_name': '孔乙己.txt', 'file_type': 'text/plain', 'file_size': 10245,...

[0mINFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
CPU times: user 925 ms, sys: 119 ms, total: 1.04 s
Wall time: 11.9 s


In [33]:
print(str(response))

孔乙己在鲁迅的小说《孔乙己》中，酒是他的生活常态和社交媒介。他经常喝半碗酒，这表明他经济状况并不宽裕，只能买得起最低廉的酒。当他涨红的脸色渐渐恢复时，旁人问及他的识字能力，他表现出不屑置辩的态度，这可能反映了他自尊心强但又不愿被人质疑的形象。酒馆里的喝酒场景，如花四文铜钱温酒、加价购买下酒物等，都揭示了他作为社会底层人物的生活状态和消费习惯，通过饮酒这一细节，孔乙己的穷困潦倒和孤苦伶仃的性格特征得以展现。


## todo

In [None]:
- 使用 InputComponent 实现一个输入走不同的分支
- 在 Query Pipeline 定义自己的组件