# milvus lite 测试示例

## 准备

In [1]:
%%time
%%capture

!pip install milvus
!pip install pymilvus
!pip install llama-index-vector-stores-milvus

CPU times: user 21.9 ms, sys: 16.3 ms, total: 38.2 ms
Wall time: 6.89 s


In [2]:
%%time

import logging
import sys

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from llama_index.embeddings.huggingface import HuggingFaceEmbedding
from IPython.display import Markdown, display
from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding

from llama_index.llms.openai_like import OpenAILike

CPU times: user 2.6 s, sys: 352 ms, total: 2.96 s
Wall time: 2.62 s


In [3]:
Settings.chunk_size=128
Settings.chunk_overlap=10

Settings

_Settings(_llm=None, _embed_model=None, _callback_manager=None, _tokenizer=None, _node_parser=SentenceSplitter(include_metadata=True, include_prev_next_rel=True, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7fe811ecb670>, id_func=<function default_id_func at 0x7fe8cfe649d0>, chunk_size=128, chunk_overlap=10, separator=' ', paragraph_separator='\n\n\n', secondary_chunking_regex='[^,.;。？！]+[,.;。？！]?'), _prompt_helper=None, _transformations=None)

In [15]:
%%time

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True,
                 temperature=0.1
                )

Settings.llm =llm

CPU times: user 327 µs, sys: 0 ns, total: 327 µs
Wall time: 332 µs


In [16]:
%%time

# 初始化全局 embedding 模型
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

Settings.embed_model = ollama_embedding

CPU times: user 176 µs, sys: 0 ns, total: 176 µs
Wall time: 181 µs


## 启动 milvus

In [6]:
%%time

from milvus import default_server
from pymilvus import connections, utility

default_server.set_base_dir('milvus_data')

# (OPTIONAL) if you want cleanup previous data
default_server.cleanup()

# Start your milvus server
default_server.start()

# Now you could connect with localhost and the given port
# Port is defined by default_server.listen_port
connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

# Stop your milvus server
# default_server.stop()

default_server.listen_port

v2.3.5-lite
CPU times: user 3.48 s, sys: 348 ms, total: 3.83 s
Wall time: 6.66 s


19530

## 加载索引

In [7]:
%%time

# load documents
documents = SimpleDirectoryReader("./books/").load_data()
print("Document ID:", documents[0].doc_id)

Document ID: 941b7b27-e6bc-4bdf-a403-a4613b4648df
CPU times: user 10.8 ms, sys: 94 µs, total: 10.9 ms
Wall time: 10.4 ms


In [8]:
%%time

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore

vector_store = MilvusVectorStore(dim=1024, overwrite=True)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

CPU times: user 17 s, sys: 631 ms, total: 17.6 s
Wall time: 1min 38s


## 基本嵌入查询

In [9]:
%%time

# Query Data
query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    similarity_cutoff=0.5
)

CPU times: user 31.7 ms, sys: 3.99 ms, total: 35.7 ms
Wall time: 35.1 ms


In [19]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

方鸿渐的妻子是孙柔嘉。
CPU times: user 702 ms, sys: 2.85 ms, total: 705 ms
Wall time: 1.4 s


In [11]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

方鸿渐的父亲在文本中并未提及，所以我无法提供他的名字。
CPU times: user 115 ms, sys: 1.09 ms, total: 116 ms
Wall time: 10.5 s


In [12]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

局部真理在文中可能被校长用来比喻教职员工在学校的特定情境中发挥的作用，就像细胞对于生物体一样，是整体不可或缺的一部分。它强调了教职员工的独特贡献，但并不意味着这个真理适用于所有教育环境，每个教师和学校都有其自身的特性和价值。
CPU times: user 413 ms, sys: 32.8 ms, total: 446 ms
Wall time: 28.6 s


## rerank

In [13]:
%%time

from llama_index.core.postprocessor import SentenceTransformerRerank

reranker = SentenceTransformerRerank(model='/models/bge-reranker-v2-m3', top_n=5)

CPU times: user 1.31 s, sys: 825 ms, total: 2.13 s
Wall time: 1.14 s


In [49]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    node_postprocessors=[reranker],
    similarity_cutoff=0.5
)

CPU times: user 176 µs, sys: 6 µs, total: 182 µs
Wall time: 187 µs


In [47]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

方鸿渐的妻子是孙柔嘉。
CPU times: user 1.36 s, sys: 3.36 ms, total: 1.37 s
Wall time: 2.35 s


In [46]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

方鸿渐的父亲是一乡之望，但具体的名字在给定的文本中并未提及。
CPU times: user 1.49 s, sys: 7.95 ms, total: 1.5 s
Wall time: 3.03 s


In [50]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

文中提到的“局部真理”是指鲍小姐虽然并非赤裸裸的，但她的某些特质或情况被认为是接近或代表了“真理”，这个词在这里可能带有比喻或讽刺的意味，暗示她并非完全的真实或完美，而是部分地符合某种观念或描述。
CPU times: user 786 ms, sys: 23.9 ms, total: 810 ms
Wall time: 3.71 s
