# milvus lite 测试示例

## 准备

In [1]:
%%time
%%capture

!pip install milvus
!pip install pymilvus
!pip install llama-index-vector-stores-milvus
!pip install llama-index-embeddings-ollama
!pip install llama-index-llms-openai-like
!pip install llama_index.core
!pip install llama-index-readers-file

CPU times: user 67.2 ms, sys: 22.2 ms, total: 89.4 ms
Wall time: 21.7 s


In [2]:
%%time

import logging
import sys

from llama_index.core import VectorStoreIndex, SimpleDirectoryReader
from llama_index.core import StorageContext
from IPython.display import Markdown, display
from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding

from llama_index.llms.openai_like import OpenAILike

CPU times: user 2.76 s, sys: 658 ms, total: 3.42 s
Wall time: 4.62 s


In [3]:
Settings.chunk_size=128
Settings.chunk_overlap=10

Settings

_Settings(_llm=None, _embed_model=None, _callback_manager=None, _tokenizer=None, _node_parser=SentenceSplitter(include_metadata=True, include_prev_next_rel=True, callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7fdfc23f75b0>, id_func=<function default_id_func at 0x7fe0806e0a60>, chunk_size=128, chunk_overlap=10, separator=' ', paragraph_separator='\n\n\n', secondary_chunking_regex='[^,.;。？！]+[,.;。？！]?'), _prompt_helper=None, _transformations=None)

In [28]:
%%time

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True,
                 temperature=0.1
                )

Settings.llm =llm

CPU times: user 201 µs, sys: 23 µs, total: 224 µs
Wall time: 228 µs


In [5]:
%%time

# 初始化全局 embedding 模型
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

Settings.embed_model = ollama_embedding

CPU times: user 447 ms, sys: 37.1 ms, total: 484 ms
Wall time: 618 ms


## 启动 milvus

In [6]:
%%time

from milvus import default_server
from pymilvus import connections, utility

default_server.set_base_dir('milvus_data')

# (OPTIONAL) if you want cleanup previous data
default_server.cleanup()

# Start your milvus server
default_server.start()

# Now you could connect with localhost and the given port
# Port is defined by default_server.listen_port
connections.connect(host='127.0.0.1', port=default_server.listen_port)

# Check if the server is ready.
print(utility.get_server_version())

# Stop your milvus server
# default_server.stop()

default_server.listen_port

v2.3.5-lite
CPU times: user 3.49 s, sys: 360 ms, total: 3.85 s
Wall time: 6.83 s


19530

## 加载索引

In [7]:
%%time

# load documents
documents = SimpleDirectoryReader("./books/").load_data()
print("Document ID:", documents[0].doc_id)

Document ID: f47bbb9f-c0fb-4a99-9135-4a360c6400be
CPU times: user 13.5 ms, sys: 12 µs, total: 13.5 ms
Wall time: 24.1 ms


In [8]:
%%time

from llama_index.core import VectorStoreIndex, StorageContext
from llama_index.vector_stores.milvus import MilvusVectorStore

vector_store = MilvusVectorStore(dim=1024, overwrite=True)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

CPU times: user 16.9 s, sys: 806 ms, total: 17.7 s
Wall time: 1min 37s


## 基本嵌入查询

In [9]:
%%time

# Query Data
query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    similarity_cutoff=0.5
)

CPU times: user 41.3 ms, sys: 0 ns, total: 41.3 ms
Wall time: 55.1 ms


In [10]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

他的妻子是孙柔嘉。
CPU times: user 168 ms, sys: 11.4 ms, total: 180 ms
Wall time: 13.4 s


In [11]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

由于提供的上下文中并未提及方鸿渐的父亲姓名，所以我无法直接给出答案。

CPU times: user 126 ms, sys: 3.53 ms, total: 130 ms
Wall time: 10.5 s


In [12]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

书中提及的“局部真理”暗示的是一个观点在特定情况下显得有道理，但它并非普遍适用或绝对正确，可能局限于某个特定情境或个体经验。这就像教书的例子，尽管个人的经验有限，但在特定的教学环境中仍能派上用场。
CPU times: user 326 ms, sys: 18.2 ms, total: 344 ms
Wall time: 23.5 s


## rerank

In [13]:
%%time

from llama_index.core.postprocessor import SentenceTransformerRerank

reranker = SentenceTransformerRerank(model='/models/bge-reranker-v2-m3', top_n=5)

CPU times: user 1.37 s, sys: 1.57 s, total: 2.94 s
Wall time: 15.8 s


In [29]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    node_postprocessors=[reranker]
)

CPU times: user 217 µs, sys: 0 ns, total: 217 µs
Wall time: 223 µs


In [30]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

方鸿渐的妻子是孙柔嘉。
CPU times: user 740 ms, sys: 8.24 ms, total: 748 ms
Wall time: 2.07 s


In [31]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

方鸿渐的父亲没有直接的名字提及。不过，从上下文可以推断他可能是一个有影响力的人物，因为唐小姐提到“你昨天闯了大祸，知道么？”并且鹏图也提到了“就是法国的博士，报上见过的”，这可能暗示他与某个知名人士有关。然而，具体的名字并未在给定的文本中提供。
CPU times: user 855 ms, sys: 11 ms, total: 866 ms
Wall time: 4.71 s


In [32]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

文中提到的“局部真理”是指鲍小姐虽然并非赤裸裸的（一丝不挂），但她的某些特质或情况被认为是接近或代表了“真理”，这可能是一种比喻，暗示她具有某种真实或实在的一面，但并不全面。这里的“局部”强调不是完整的、绝对的真理，而是部分真实的反映。
CPU times: user 805 ms, sys: 11.5 ms, total: 817 ms
Wall time: 4.26 s
