# faiss使用余弦相似度 - 围城示例

In [1]:
%%time
%%capture

# 所有依赖库
!pip install llama-index
!pip install llama-index-llms-openai-like
!pip install llama-index-llms-ollama
!pip install llama-index-vector-stores-faiss

CPU times: user 28.7 ms, sys: 23.6 ms, total: 52.2 ms
Wall time: 7.6 s


In [2]:
%%time

# 导入需要的包

from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore

CPU times: user 1.34 s, sys: 206 ms, total: 1.55 s
Wall time: 1.26 s


In [3]:
%%time

# 初始化全局 embedding 模型
from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

Settings.embed_model = ollama_embedding

CPU times: user 649 ms, sys: 31.8 ms, total: 681 ms
Wall time: 681 ms


In [4]:
%%time

import logging
import sys

# 设置系统日志，便于设置level排查

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

CPU times: user 165 µs, sys: 0 ns, total: 165 µs
Wall time: 169 µs


In [5]:
%%time

# 加载围城小说文本

documents = SimpleDirectoryReader(input_files=["./围城.txt"]).load_data()
documents[0].metadata['file_path']

CPU times: user 10.3 ms, sys: 329 µs, total: 10.6 ms
Wall time: 10.5 ms


'围城.txt'

In [6]:
%%time

# 生成faiss索引

import faiss

Settings.chunk_size=128
Settings.chunk_overlap=20

# dimensions of bge-large-zh
d = 1024
faiss_index = faiss.IndexFlatIP(d)

vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

INFO:faiss.loader:Loading faiss with AVX2 support.
Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
Successfully loaded faiss with AVX2 support.
CPU times: user 19 s, sys: 539 ms, total: 19.5 s
Wall time: 1min 13s


In [7]:
%%time

from llama_index.llms.openai_like import OpenAILike

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True
                )
Settings.llm =llm

CPU times: user 1.05 s, sys: 71.8 ms, total: 1.13 s
Wall time: 1.13 s


In [9]:
%%time

# set Logging to DEBUG for more detailed outputs
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    similarity_cutoff=0.5
)

streaming_response = query_engine.query("方鸿渐的妻子是谁，给出她的姓名")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的妻子是苏小姐，小说中称为苏文纨。
CPU times: user 233 ms, sys: 7.85 ms, total: 241 ms
Wall time: 9.58 s


In [10]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.16

In [11]:
%%time

from llama_index.core.postprocessor import SentenceTransformerRerank

reranker = SentenceTransformerRerank(model='/models/bge-reranker-v2-m3', top_n=5)

CPU times: user 1.16 s, sys: 709 ms, total: 1.87 s
Wall time: 1.24 s


In [12]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=100,
    node_postprocessors=[reranker],
    similarity_cutoff=0.5
)

CPU times: user 225 µs, sys: 0 ns, total: 225 µs
Wall time: 229 µs


In [15]:
%%time

streaming_response = query_engine.query("方鸿渐的妻子是谁")
streaming_response.print_response_stream()
print()

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
方鸿渐的妻子是孙柔嘉。
CPU times: user 636 ms, sys: 873 µs, total: 637 ms
Wall time: 1.45 s


In [16]:
%%time

streaming_response = query_engine.query("文中提到的局部真理是啥意思")
streaming_response.print_response_stream()
print()

Batches:   0%|          | 0/4 [00:00<?, ?it/s]

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
文中提到的“局部的真理”可能是指一种不全面或不完整的理解或表达，这里的鲍小姐并未完全赤裸，所以他们用“局部的真理”这个词来形容这种不完全的情况。这可能暗示着某种隐晦或委婉的说法，用来避免直接讨论敏感话题。
CPU times: user 743 ms, sys: 24.7 ms, total: 767 ms
Wall time: 3.55 s


In [17]:
display(len(streaming_response.source_nodes))

5

In [18]:
streaming_response.source_nodes

[NodeWithScore(node=TextNode(id_='7b623dfb-6f34-4837-83bd-05a3d17c77ff', embedding=None, metadata={'file_path': '围城.txt', 'file_name': '围城.txt', 'file_type': 'text/plain', 'file_size': 644668, 'creation_date': '2024-05-07', 'last_modified_date': '2024-05-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='0ada476a-5255-4484-be84-166c2adb4f7d', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '围城.txt', 'file_name': '围城.txt', 'file_type': 'text/plain', 'file_size': 644668, 'creation_date': '2024-05-07', 'last_modified_date': '2024-05-07'}, hash='5c95afdac58fe0642a5e9382eb5d5323c30dbc68388f6faccd997d7d1fb1690e'), <NodeRelationship.PREVIOUS: '2'>: RelatedNodeInfo(node_id='73e50055-969e-4746-bcd1-f

In [22]:
streaming_response.source_nodes[0].score

0.77791935