# bge-large-zh

## curl 简单调用 -- 判断是否可用

In [1]:
%%time
%%capture
%%bash

curl -s http://192.168.0.72:11435/api/embeddings \
    -H "Content-Type: application/json" \
    -d '{
        "model": "dztech/bge-large-zh:v1.5",
        "prompt": "Llamas are members of the camelid family"
    }'

CPU times: user 4.59 ms, sys: 4.11 ms, total: 8.7 ms
Wall time: 780 ms


In [2]:
%%time
%%capture
%%bash

curl -s http://192.168.0.72:11435/api/embeddings \
    -H "Content-Type: application/json" \
    -d '{
        "model": "chatfire/bge-m3:q8_0",
        "prompt": "Llamas are members of the camelid family"
    }'

CPU times: user 3.73 ms, sys: 0 ns, total: 3.73 ms
Wall time: 27.8 ms


## LlamaIndex api - 生成嵌入向量

In [2]:
%%time

from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

pass_embedding = ollama_embedding.get_text_embedding_batch(
    ["This is a passage!", "This is another passage"], 
    # show_progress=True
)
len(pass_embedding[1])

CPU times: user 1.35 s, sys: 284 ms, total: 1.64 s
Wall time: 1.27 s


1024

In [3]:
%%time

from llama_index.embeddings.ollama import OllamaEmbedding

ollama_embedding = OllamaEmbedding(
    model_name="chatfire/bge-m3:q8_0",
    # model_name="bge-m3:latest",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0}, # -mirostat N 使用 Mirostat 采样。
)

pass_embedding = ollama_embedding.get_text_embedding_batch(
    ["This is a passage!", "This is another passage"], 
    # show_progress=True
)
len(pass_embedding[1])

CPU times: user 2.82 s, sys: 513 ms, total: 3.33 s
Wall time: 4.06 s


1024

## LlamaIndex api + Faiss存储索引

### 加载库

In [3]:
%%time
%%capture

!pip install llama-index
!pip install llama-index-llms-openai-like
!pip install llama-index-llms-ollama
!pip install llama-index-vector-stores-faiss

CPU times: user 31.2 ms, sys: 15.5 ms, total: 46.8 ms
Wall time: 7.36 s


In [4]:
%%time

from llama_index.core import Settings
from llama_index.embeddings.ollama import OllamaEmbedding
from llama_index.core import (
    SimpleDirectoryReader,
    load_index_from_storage,
    VectorStoreIndex,
    StorageContext,
)
from llama_index.vector_stores.faiss import FaissVectorStore

CPU times: user 134 µs, sys: 3.28 ms, total: 3.41 ms
Wall time: 2.81 ms


### 设置系统日志

In [5]:
%%time

import logging
import sys

logging.basicConfig(stream=sys.stdout, level=logging.INFO)
logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))

CPU times: user 209 µs, sys: 42 µs, total: 251 µs
Wall time: 254 µs


### 嵌入模型 - bge-large-zh

In [6]:
%%time

ollama_embedding = OllamaEmbedding(
    model_name="dztech/bge-large-zh:v1.5",
    base_url="http://192.168.0.72:11435",
    ollama_additional_kwargs={"mirostat": 0},
)

Settings.embed_model = ollama_embedding

CPU times: user 621 ms, sys: 39.4 ms, total: 660 ms
Wall time: 660 ms


### 加载文档

In [7]:
%%time

documents = SimpleDirectoryReader("./books/").load_data()
documents[0].id

CPU times: user 7.01 ms, sys: 3.84 ms, total: 10.9 ms
Wall time: 10.4 ms


[Document(id_='f06f07e5-8c85-433d-a652-a195defc7a96', embedding=None, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/bge/books/iphone.txt', 'file_name': 'iphone.txt', 'file_type': 'text/plain', 'file_size': 563, 'creation_date': '2024-05-07', 'last_modified_date': '2024-05-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={}, text='iPhone 15和iPhone 15 Plus是由Apple Inc.设计、开发和销售的智能手机。它们是第十七代、也是当前一代iPhone，接替iPhone 14 和 iPhone 14 Plus。这些设备于 2023 年 9 月 12 日在加利福尼亚州库比蒂诺Apple Park举行的Apple 活动期间与价格较高的iPhone 15 Pro 和 15 Pro Max一起发布。预订于 2023 年 9 月 15 日开始，这些设备于 2023 年 9 月 22 日上市。与 iPhone 15 Pro 和 Pro Max 一样，15 和 15 Plus 是第一批用USB-C取代专有Lightning连接器的iPhone遵守欧盟的指令。', start_char_idx=None, end_char_idx=None, text_template='{metadata_str}\n\n{content}', metadata_te

### 创建Faiss索引

ModuleNotFoundError: No module named 'faiss.swigfaiss_avx2'

解决办法是：

```bash
# find / -name swigfaiss.py
/usr/local/lib/python3.10/dist-packages/faiss/swigfaiss.py
# cd /usr/local/lib/python3.10/dist-packages/faiss/
# ln -s swigfaiss.py swigfaiss_avx2.py
```

https://github.com/kyamagu/faiss-wheels/issues/39

In [8]:
%%time

import faiss

# dimensions of bge-large-zh
d = 1024
faiss_index = faiss.IndexFlatL2(d)

INFO:faiss.loader:Loading faiss with AVX2 support.
Loading faiss with AVX2 support.
INFO:faiss.loader:Successfully loaded faiss with AVX2 support.
Successfully loaded faiss with AVX2 support.
CPU times: user 17.3 ms, sys: 181 µs, total: 17.4 ms
Wall time: 17.3 ms


In [9]:
%%time

vector_store = FaissVectorStore(faiss_index=faiss_index)
storage_context = StorageContext.from_defaults(vector_store=vector_store)
index = VectorStoreIndex.from_documents(
    documents, storage_context=storage_context
)

CPU times: user 222 ms, sys: 20.1 ms, total: 242 ms
Wall time: 310 ms


In [10]:
%%time

# save index to disk
index.storage_context.persist()

CPU times: user 2.26 ms, sys: 0 ns, total: 2.26 ms
Wall time: 1.97 ms


In [11]:
%%time

# load index from disk
vector_store = FaissVectorStore.from_persist_dir("./storage")
storage_context = StorageContext.from_defaults(
    vector_store=vector_store, persist_dir="./storage"
)
index = load_index_from_storage(storage_context=storage_context)

INFO:root:Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
Loading llama_index.vector_stores.faiss.base from ./storage/default__vector_store.json.
INFO:llama_index.core.indices.loading:Loading all indices.
Loading all indices.
CPU times: user 1.06 ms, sys: 3.57 ms, total: 4.63 ms
Wall time: 4.02 ms


### 查询索引

In [12]:
%%time

from llama_index.llms.openai_like import OpenAILike

llm = OpenAILike(model="xiaoyu", 
                 api_base="http://192.168.0.72:3000/v1", 
                 api_key="sk-bJP6QSnUfjAYeYeE505d3eBf63A643BeB0B8E350Df9b7750",
                 is_chat_model=True
                )
Settings.llm =llm

CPU times: user 1.02 s, sys: 120 ms, total: 1.14 s
Wall time: 1.14 s


In [13]:
%%time

# set Logging to DEBUG for more detailed outputs
logging.basicConfig(stream=sys.stdout, level=logging.DEBUG)

query_engine = index.as_query_engine(
    similarity_top_k=3,
    similarity_cutoff=0.5
)
response = query_engine.query("介绍下iphone15")

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
CPU times: user 213 ms, sys: 0 ns, total: 213 ms
Wall time: 6.85 s


In [14]:
%%time

from IPython.display import Markdown, display

display(Markdown(f"{response}"))

iPhone 15和iPhone 15 Plus是由Apple Inc.设计、开发和销售的智能手机。它们是第十七代、也是当前一代iPhone，接替iPhone 14 和 iPhone 14 Plus。这些设备于 2023 年 9 月 12 日在加利福尼亚州库比蒂诺Apple Park举行的Apple 活动期间与价格较高的iPhone 15 Pro 和 15 Pro Max一起发布。预订于 2023 年 9 月 15 日开始，这些设备于 2023 年 9 月 22 日上市。与 iPhone 15 Pro 和 Pro Max 一样，15 和 15 Plus 是第一批用USB-C取代专有Lightning连接器的iPhone，遵守欧盟的指令。

CPU times: user 1.38 ms, sys: 193 µs, total: 1.57 ms
Wall time: 1.24 ms


### 流式查询索引

In [18]:
%%time

query_engine = index.as_query_engine(
    streaming=True,
    similarity_top_k=3,
    similarity_cutoff=0.5
)

streaming_response = query_engine.query("介绍下苹果树这种植物")
streaming_response.print_response_stream()

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
苹果树（学名：Malus domestica）是蔷薇科苹果亚科苹果属的一种落叶乔木。这种植物在全球范围内都有广泛的种植，因其果实——苹果而闻名。苹果通常呈红色，但品种众多，颜色和口感各异，从脆甜到绵密不等。它们富含矿物质和维生素，是人们日常饮食中常见的水果之一。苹果树的栽培历史悠久，已知品种超过7,500种，这些品种根据不同的需求和用途，如烹饪、生食或用于酿酒等特性进行了培育。苹果不仅可以直接食用，也被广泛用于食品加工和饮料制作。CPU times: user 176 ms, sys: 24.5 ms, total: 200 ms
Wall time: 8.82 s


### response格式

In [19]:
streaming_response

StreamingResponse(response_gen=<generator object stream_chat_response_to_tokens.<locals>.gen at 0x7f0fa8f9b4c0>, source_nodes=[NodeWithScore(node=TextNode(id_='94a98846-0232-4599-a144-33016fe561d2', embedding=None, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/bge/books/tree.txt', 'file_name': 'tree.txt', 'file_type': 'text/plain', 'file_size': 485, 'creation_date': '2024-05-07', 'last_modified_date': '2024-05-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='769d884b-f957-46f9-b93d-3f2303ef4151', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/bge/books/tree.txt', 'file_name': 'tree.txt', 'file_type': 'text/plain', 'file_size': 485, 

In [20]:
streaming_response.source_nodes[0]

NodeWithScore(node=TextNode(id_='94a98846-0232-4599-a144-33016fe561d2', embedding=None, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/bge/books/tree.txt', 'file_name': 'tree.txt', 'file_type': 'text/plain', 'file_size': 485, 'creation_date': '2024-05-07', 'last_modified_date': '2024-05-07'}, excluded_embed_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], excluded_llm_metadata_keys=['file_name', 'file_type', 'file_size', 'creation_date', 'last_modified_date', 'last_accessed_date'], relationships={<NodeRelationship.SOURCE: '1'>: RelatedNodeInfo(node_id='769d884b-f957-46f9-b93d-3f2303ef4151', node_type=<ObjectType.DOCUMENT: '4'>, metadata={'file_path': '/root/notebook/my-jupyter-notebook/llm/bge/books/tree.txt', 'file_name': 'tree.txt', 'file_type': 'text/plain', 'file_size': 485, 'creation_date': '2024-05-07', 'last_modified_date': '2024-05-07'}, hash='59822ddd03d6a4b78ae8122f3fb4c173db9e0c15aa417c706ae79

In [24]:
streaming_response.source_nodes[0].metadata['file_path']

'/root/notebook/my-jupyter-notebook/llm/bge/books/tree.txt'

In [25]:
streaming_response.source_nodes[0].score

735.306640625

In [26]:
streaming_response.source_nodes[1].metadata['file_path']

'/root/notebook/my-jupyter-notebook/llm/bge/books/xiaomi.txt'

In [27]:
streaming_response.source_nodes[1].score

802.29248046875

In [29]:
streaming_response.source_nodes[2].metadata['file_path']

'/root/notebook/my-jupyter-notebook/llm/bge/books/iphone.txt'

In [28]:
streaming_response.source_nodes[2].score

857.120849609375

### 使用底层api流式输出

In [32]:
%%time

streaming_response = query_engine.query("介绍下小米手机")

for token in streaming_response.response_gen:
    print(token, end="",flush=True)

INFO:httpx:HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
HTTP Request: POST http://192.168.0.72:3000/v1/chat/completions "HTTP/1.1 200 OK"
小米14是小米公司推出的一系列智能手机，基于Android系统，定位为旗舰产品。它搭载了高通Snapdragon 8 Gen 3芯片组，这是2023年发布的产品系列，包括小米14、14 Pro和14 Ultra等型号。其中，小米14 Ultra是在2月22日发布的，而14系列的其他型号在11月1日在中国市场上市。这些手机反映了小米在高端市场的努力，并且可能具有先进的技术和特性。CPU times: user 141 ms, sys: 23 ms, total: 164 ms
Wall time: 4.03 s
