# 使用 qdrant 测试小说《围城》数据集

## 准备

In [1]:
%%time

# 导入需要的包

from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.llms.openai_like import OpenAILike

import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client.models import Distance, VectorParams

CPU times: user 3.33 s, sys: 359 ms, total: 3.69 s
Wall time: 3.33 s


In [3]:
Settings.chunk_size=128
Settings.chunk_overlap=10

Settings

_Settings(_llm=OpenAILike(callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f109c54cc70>, system_prompt=None, messages_to_prompt=<function messages_to_prompt at 0x7f115c467d90>, completion_to_prompt=<function default_completion_to_prompt at 0x7f115c2da5f0>, output_parser=None, pydantic_program_mode=<PydanticProgramMode.DEFAULT: 'default'>, query_wrapper_prompt=None, model='xiaoyu', temperature=0.1, max_tokens=None, logprobs=None, top_logprobs=0, additional_kwargs={}, max_retries=3, timeout=60.0, default_headers=None, reuse_client=True, api_key='sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750', api_base='http://192.168.0.72:3000/v1', api_version='', context_window=3900, is_chat_model=True, is_function_calling_model=False, tokenizer=None), _embed_model=None, _callback_manager=<llama_index.core.callbacks.base.CallbackManager object at 0x7f109c54cc70>, _tokenizer=None, _node_parser=SentenceSplitter(include_metadata=True, include_prev_next_rel=True, callback

In [2]:
%%time

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True,
                 temperature=0.1
                )

Settings.llm =llm

CPU times: user 138 ms, sys: 0 ns, total: 138 ms
Wall time: 137 ms


In [4]:
%%time

# 初始化全局 embedding 模型

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

Settings.embed_model = ollama_embedding

CPU times: user 558 ms, sys: 20 ms, total: 578 ms
Wall time: 579 ms


In [5]:
%%time

import logging
import sys

# 设置系统日志，便于设置level排查

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

CPU times: user 217 µs, sys: 21 µs, total: 238 µs
Wall time: 242 µs


In [6]:
%%time

# 内存启动qdrant

client = qdrant_client.QdrantClient(
    location=":memory:",
    vectors_config=VectorParams(size=1024, distance=Distance.COSINE),
)

CPU times: user 1.46 ms, sys: 0 ns, total: 1.46 ms
Wall time: 1.15 ms


## 索引

In [7]:
%%time

# load documents
documents = SimpleDirectoryReader("./books/").load_data()
print("Document ID:", documents[0].doc_id)

Document ID: 392800d8-9662-4cb2-b50e-43ede8977f17
CPU times: user 11.1 ms, sys: 0 ns, total: 11.1 ms
Wall time: 10.6 ms


In [8]:
%%time
vector_store = QdrantVectorStore(client=client, collection_name="simple")
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents,
    storage_context=storage_context,
)

CPU times: user 16.4 s, sys: 614 ms, total: 17 s
Wall time: 1min 24s


## 查询

### 基本嵌入查询

In [9]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    similarity_cutoff=0.5
)

CPU times: user 31 ms, sys: 7.79 ms, total: 38.8 ms
Wall time: 38.2 ms


In [10]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的妻子是孙柔嘉。
CPU times: user 213 ms, sys: 154 ms, total: 367 ms
Wall time: 13.3 s


In [13]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
</|system|>
CPU times: user 218 ms, sys: 118 ms, total: 336 ms
Wall time: 11.6 s


In [14]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字，名字好像带一个翁字")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的父亲名为方遯翁。
CPU times: user 194 ms, sys: 140 ms, total: 333 ms
Wall time: 12.6 s


In [15]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.16

### rerank

In [16]:
%%time

from llama_index.core.postprocessor import SentenceTransformerRerank

reranker = SentenceTransformerRerank(model='/models/bge-reranker-v2-m3', top_n=5)

CPU times: user 1.38 s, sys: 724 ms, total: 2.1 s
Wall time: 1.39 s


In [17]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    node_postprocessors=[reranker],
    similarity_cutoff=0.5
)

CPU times: user 171 µs, sys: 19 µs, total: 190 µs
Wall time: 193 µs


In [18]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
孙柔嘉
CPU times: user 1.48 s, sys: 254 ms, total: 1.73 s
Wall time: 2.58 s


In [19]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的父亲没有直接的名字提及。不过，从上下文可以推断他可能是方鸿渐的长辈，且对他的行为有所干预和指导，比如督促他回府陪伴祖父母，并提及苏小姐和唐晓芙的父亲。因此，如果需要一个名字，可以假设他可能类似于传统中国家长的角色，但没有具体的名字在给定文本中。
CPU times: user 950 ms, sys: 131 ms, total: 1.08 s
Wall time: 4.33 s


In [20]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字，名字好像带一个翁字")
streaming_response.print_response_stream()
print()

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的父亲名字是豚翁。
CPU times: user 905 ms, sys: 117 ms, total: 1.02 s
Wall time: 1.86 s


In [21]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
文中提到的“局部的真理”可能是在讨论某种比喻或引申含义，它并不是字面上的完全赤裸裸的真相，而是一种部分的、不全面的或者经过修正的说法。具体情境中，这可能是鲍小姐的情况被描述为没有完全袒露，但又不是完全不真实，因此被称为“局部的真理”。
CPU times: user 954 ms, sys: 131 ms, total: 1.08 s
Wall time: 4.13 s
