# 围城小说示例 - bge-large-zh

## 准备

In [3]:
%%time
%%capture
%%bash

# 确认 embeddings 端点可用
curl -s http://192.168.0.72:11435/api/embeddings \
    -H "Content-Type: application/json" \
    -d '{
        "model": "dztech/bge-large-zh:v1.5",
        "prompt": "Llamas are members of the camelid family"
    }'

CPU times: user 584 µs, sys: 3.31 ms, total: 3.89 ms
Wall time: 31.7 ms


In [5]:
%%time
%%capture

# 所有依赖库
!pip install llama-index
!pip install llama-index-llms-openai-like
!pip install llama-index-llms-ollama
!pip install llama-index-vector-stores-faiss

CPU times: user 30.5 ms, sys: 16.4 ms, total: 47 ms
Wall time: 7.43 s


In [6]:
%%time

# 导入需要的包

from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore

CPU times: user 3.34 ms, sys: 0 ns, total: 3.34 ms
Wall time: 2.93 ms


In [7]:
%%time

# 初始化全局 embedding 模型
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

Settings.embed_model = ollama_embedding

CPU times: user 647 ms, sys: 40.5 ms, total: 687 ms
Wall time: 687 ms


In [8]:
%%time

import logging
import sys

# 设置系统日志，便于设置level排查

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

CPU times: user 136 µs, sys: 21 µs, total: 157 µs
Wall time: 160 µs


## 加载文件

In [13]:
%%time

# 加载围城小说文本

documents = SimpleDirectoryReader(input_files=["./围城.txt"]).load_data()
documents[0].metadata['file_path']

CPU times: user 2.49 ms, sys: 0 ns, total: 2.49 ms
Wall time: 2.17 ms


'围城.txt'

In [17]:
%%time

# 生成faiss索引

import faiss

Settings.chunk_size=128
Settings.chunk_overlap=20

# dimensions of bge-large-zh
d = 1024
faiss_index = faiss.IndexFlatL2(d)

vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

CPU times: user 18.5 s, sys: 499 ms, total: 19 s
Wall time: 1min 12s


In [18]:
%%time

from llama_index.llms.openai_like import OpenAILike

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True
                )
Settings.llm =llm

CPU times: user 1.06 s, sys: 52.2 ms, total: 1.11 s
Wall time: 1.11 s


## 最基本的嵌入

In [24]:
%%time

# set Logging to DEBUG for more detailed outputs
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    similarity_cutoff=0.5
)

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
孙柔嘉
CPU times: user 80.3 ms, sys: 4.23 ms, total: 84.5 ms
Wall time: 7.75 s


In [26]:
%%time

streaming_response = query_engine.query("孙柔嘉是谁")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
孙柔嘉是《围城》中的女主角，方鸿渐的妻子，她的形象体现了小说中女性角色的复杂性和多面性。作为方鸿渐的情感归宿和生活伴侣，她的性格既独立又敏感，这在作品中通过与主人公的互动和婚姻生活的描写得以展现。她的存在不仅是故事发展的重要推动力，也是对当时社会和婚

In [27]:
%%time

streaming_response = query_engine.query("介绍下苏文纨")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.16

In [28]:
%%time

streaming_response = query_engine.query("方鸿渐和赵辛楣是啥关系")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
赵辛楣与方鸿渐在《围城》中是大学同学，他们之间有深厚的友谊，但也有竞争和误解。辛楣最初对鸿渐并不看好，但随着故事的发展，他帮助鸿渐解决了一些问题，显示出两人关系的复杂性和变化性。具体细节需要通过小说的情节来进一步揭示。
CPU times: user

In [29]:
%%time

streaming_response = query_engine.query("孙柔嘉和赵辛楣是啥关系")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.16

In [30]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
局部真理在这里可能指的是文本中通过鲍小姐的言行所体现的某种深刻见解或情感状态，它并非字面上的绝对真理，而是作者通过文学手法传达的一种主观感受或者社会观察。这种“真理”是相对的，取决于读者的解读和生活经验，是一种隐含在故事细节中的理解。
CPU tim

In [35]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=10,
    similarity_cutoff=0.5
)

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的父亲是方□（辶＋豚）翁，但具体的名字在文本中并未提及。
CPU times: user 43.6 ms, sys: 11.5 ms, total: 55 ms
Wall time: 2.04 s


In [40]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    similarity_cutoff=0.5
)

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
display(len(streaming_response.source_nodes))
# print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方豚翁是方鸿渐的父亲。

100

CPU times: user 112 ms, sys: 147 µs, total: 112 ms
Wall time: 9.84 s


## 使用reranker

In [45]:
%%time

from llama_index.core.postprocessor import SentenceTransformerRerank

reranker = SentenceTransformerRerank(model='/models/bge-reranker-v2-m3', top_n=5)

CPU times: user 1.02 s, sys: 632 ms, total: 1.66 s
Wall time: 815 ms


In [47]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    node_postprocessors=[reranker],
    similarity_cutoff=0.5
)

CPU times: user 181 µs, sys: 18 µs, total: 199 µs
Wall time: 202 µs


In [48]:
%%time

streaming_response = query_engine.query("方鸿渐的父亲是谁，说出他的名字")
streaming_response.print_response_stream()
display(len(streaming_response.source_nodes))

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的父亲叫方豚翁。

5

CPU times: user 1.09 s, sys: 68.1 ms, total: 1.15 s
Wall time: 2.1 s


In [49]:
%%time

streaming_response = query_engine.query("方鸿渐和赵辛楣是啥关系")
streaming_response.print_response_stream()
display(len(streaming_response.source_nodes))

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐和赵辛楣的关系在不同的情境中有微妙的变化。他们最初可能是朋友或同行，因为赵辛楣表示过对方鸿渐学问人品的欣赏，并邀请他来帮忙。然而，随着赵辛楣地位的上升，两人之间的关系似乎发生了逆转，方鸿渐感到需要仰视赵辛楣，不再像以前那样平等。这表明赵辛楣在某种程度上可能给方鸿渐带来了压力或竞争感。具体到“从龙派”这个说法，可能指的是赵辛楣的地位让方鸿渐觉得自己像是他的追随者，而不是平等的对话伙伴。因此，他们的关系可以描述为由最初的欣赏和合作转向了一种带有竞争和不平等感的互动。

5

CPU times: user 850 ms, sys: 46.7 ms, total: 896 ms
Wall time: 6.89 s


In [50]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
display(len(streaming_response.source_nodes))

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
文中提到的“局部的真理”可能是指鲍小姐并未完全赤裸，这在特定语境下被形容为一种不全面或部分的真实情况。这种表达可能带有一些幽默或讽刺的成分，暗示着事情并非表面上看起来那么简单直接。

5

CPU times: user 732 ms, sys: 22.4 ms, total: 754 ms
Wall time: 3.17 s
