# Qdrant 示例

## 加载文档

In [1]:
%%time

from llama_index.core import SimpleDirectoryReader

documents=SimpleDirectoryReader(
    input_dir="./data",
    filename_as_id=True,
).load_data()

len(documents)

CPU times: user 1.55 s, sys: 247 ms, total: 1.8 s
Wall time: 1.42 s


2

## 全局设置

In [2]:
%%time

from llama_index.core import Settings
from llama_index.llms.ollama import Ollama
from llama_index.embeddings.ollama import OllamaEmbedding

Settings.llm = Ollama(
    base_url="http://ape:11434",
    model="qwen2",
    is_chat_model=True,
    temperature=0.1,
    request_timeout=60.0
)

Settings.embed_model = OllamaEmbedding(
    model_name="rjmalagon/gte-qwen2-1.5b-instruct-embed-f16",
    base_url="http://ape:11434",
    # -mirostat N 使用 Mirostat 采样。
    ollama_additional_kwargs={"mirostat": 0},
)

# Settings.chunk_size = 512
# Settings.chunk_overlap = 50

CPU times: user 732 ms, sys: 32.2 ms, total: 764 ms
Wall time: 764 ms


## 创建向量索引

In [3]:
%%time

import qdrant_client
from llama_index.vector_stores.qdrant import QdrantVectorStore
from qdrant_client.models import Distance, VectorParams
from llama_index.core import StorageContext
from llama_index.core import VectorStoreIndex

client = qdrant_client.QdrantClient(
    # location=":memory:",
    host="ape", port=6333,
)

collection_name = "example"

vector_store = QdrantVectorStore(
    client=client, 
    collection_name=collection_name
)
storage_context = StorageContext.from_defaults(vector_store=vector_store)

collections = client.get_collections()

# 检查集合是否存在
if any(collection.name == collection_name for collection in collections.collections):
    print(f"Collection '{collection_name}' 已经存在，加载.")
    index = VectorStoreIndex.from_vector_store(vector_store=vector_store)
else:
    print(f"Collection '{collection_name}' 不存在，创建.")

    index = VectorStoreIndex.from_documents(
        documents,
        storage_context=storage_context,
    )

index

Collection 'example' 不存在，创建.
CPU times: user 1.47 s, sys: 64.8 ms, total: 1.54 s
Wall time: 2.35 s


<llama_index.core.indices.vector_store.base.VectorStoreIndex at 0x7f89fd4dbf10>

## 高级查询

In [4]:
%%time

query_engine = index.as_query_engine(
    response_mode="tree_summarize", 
    streaming=True,
)

CPU times: user 123 µs, sys: 11 µs, total: 134 µs
Wall time: 137 µs


In [5]:
%%time

response = query_engine.query("天问三号原计划是何时发射？")
response.print_response_stream()

天问三号任务原计划在2030年前后实施发射。CPU times: user 72.8 ms, sys: 1.94 ms, total: 74.7 ms
Wall time: 1.16 s


In [6]:
response.source_nodes[0].score

0.68652153

## 底层 api 查询

In [7]:
%%time

from llama_index.core import get_response_synthesizer
from llama_index.core.retrievers import VectorIndexRetriever
from llama_index.core.query_engine import RetrieverQueryEngine
from llama_index.core.postprocessor import SimilarityPostprocessor

retriever = VectorIndexRetriever(
    index=index
)

response_synthesizer = get_response_synthesizer(
    streaming=True,
)

query_engine = RetrieverQueryEngine(
    retriever=retriever,
    response_synthesizer=response_synthesizer,
    node_postprocessors=[SimilarityPostprocessor(similarity_cutoff=0.3)],
)

CPU times: user 4.12 ms, sys: 9 µs, total: 4.13 ms
Wall time: 3.71 ms


In [8]:
%%time

response = query_engine.query("高铁一公里耗电一万度是否属实？")
response.print_response_stream()

网传“中国高铁一公里耗一万度电”的说法不属实。这一说法偷换了概念，与事实不符。以现有CRH380A、CRH380B、CR400AF、CR400BF等4个8编组的主力车型为例，其轮周功率分别为：9360kW、9200kW、9750kW、10140kW。通过在京沪高铁（往返距离2636公里）达速350km/h运行条件下的能耗测试数据显示，其耗电量分别为：56931度、61861度、51364度、55490度，据此推算出平均每公里耗电量分别为：21.6度、23.5度、19.5度、21.1度。因此，网传“一公里耗一万度电”是错误的，实际上中国高铁列车每公里耗电量远低于这一数值。CPU times: user 260 ms, sys: 29.7 ms, total: 290 ms
Wall time: 5.03 s


## 追加文档

In [9]:
%%time

new_document=SimpleDirectoryReader(
    input_dir="./data2",
    filename_as_id=True,
).load_data()[0]

documents.append(new_document)

len(documents)

CPU times: user 2.09 ms, sys: 0 ns, total: 2.09 ms
Wall time: 1.75 ms


3

## 刷新索引

In [10]:
%%time

index.refresh_ref_docs(documents)

CPU times: user 57.2 ms, sys: 6 µs, total: 57.2 ms
Wall time: 203 ms


[False, False, True]