### FlagEmbedding

In [6]:
from FlagEmbedding import FlagAutoModel

model = FlagAutoModel.from_finetuned('BAAI/bge-base-en-v1.5',
                                      query_instruction_for_retrieval="Represent this sentence for searching relevant passages:",
                                      use_fp16=True)

In [None]:
sentences_1 = ["I love NLP", "I love machine learning"]
sentences_2 = ["I love BGE", "I love text retrieval"]
embeddings_1 = model.encode(sentences_1)
embeddings_2 = model.encode(sentences_2)
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


In [1]:
from FlagEmbedding import FlagAutoModel
sentences_1 = ["今天的医保怎么报销？", "怎么交养老保险？"]
sentences_2 = ["医保报销流程说明", "养老保险缴费指南"]
model = FlagAutoModel.from_finetuned('BAAI/bge-large-zh-v1.5',
                                     query_instruction_for_retrieval="为这个句子生成表示以用于检索相关文章：",
                                     use_fp16=True
                                     ) 
embeddings_1 = model.encode(sentences_1)
embeddings_2 = model.encode(sentences_2)
similarity = embeddings_1 @ embeddings_2.T
print(similarity)

# for s2p(short query to long passage) retrieval task, suggest to use encode_queries() which will automatically add the instruction to each query
# corpus in retrieval task can still use encode_corpus(), since they don't need instruction
queries = ["今天的医保怎么报销？", "怎么交养老保险？"]
passages = ["医保报销流程说明", "养老保险缴费指南"]
q_embeddings = model.encode_queries(queries)
p_embeddings = model.encode_corpus(passages) 
scores = q_embeddings @ p_embeddings.T       
print(scores)

  from .autonotebook import tqdm as notebook_tqdm
You're using a BertTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


[[0.62978095 0.43070096]
 [0.44024277 0.7314134 ]]
[[0.6212836  0.4161198 ]
 [0.44911343 0.74316585]]


### SentenceTransformer

In [3]:
from sentence_transformers import SentenceTransformer
queries = ['query_1', 'query_2']
passages = ["样例文档-1", "样例文档-2"]
instruction = "为这个句子生成表示以用于检索相关文章："

model = SentenceTransformer('BAAI/bge-large-zh-v1.5')
q_embeddings = model.encode([instruction+q for q in queries], normalize_embeddings=True)
p_embeddings = model.encode(passages, normalize_embeddings=True)
scores = q_embeddings @ p_embeddings.T
scores

array([[0.33725077, 0.20507815],
       [0.22591075, 0.3849578 ]], dtype=float32)

### langchain

In [None]:
from langchain.embeddings import HuggingFaceBgeEmbeddings
model_name = "BAAI/bge-large-en-v1.5"
model_kwargs = {'device': 'cpu'}
encode_kwargs = {'normalize_embeddings': True}
model = HuggingFaceBgeEmbeddings(
    model_name=model_name,
    model_kwargs=model_kwargs,
    encode_kwargs=encode_kwargs,
    query_instruction="为这个句子生成表示以用于检索相关文章："
)
model.query_instruction = "为这个句子生成表示以用于检索相关文章："

### HuggingFace Transformers

In [None]:
from transformers import AutoTokenizer, AutoModel
import torch
# Sentences we want sentence embeddings for
sentences = ["样例数据-1", "样例数据-2"]

# Load model
tokenizer = AutoTokenizer.from_pretrained('BAAI/bge-large-zh-v1.5')
model = AutoModel.from_pretrained('BAAI/bge-large-zh-v1.5')
model.eval()

# Tokenize sentences
encoded_input = tokenizer(sentences, padding=True, truncation=True, return_tensors='pt')
# for s2p(short query to long passage) retrieval task, add an instruction to query (not add instruction for passages)
# encoded_input = tokenizer([instruction + q for q in queries], padding=True, truncation=True, return_tensors='pt')

# Compute token embeddings
with torch.no_grad():
    model_output = model(**encoded_input)
    # Perform pooling cls pooling
    sentence_embeddings = model_output[0][:, 0]
# normalize embeddings
sentence_embeddings = torch.nn.functional.normalize(sentence_embeddings, p=2, dim=1)
print("Sentence embeddings:", sentence_embeddings)


Asking to truncate to max_length but no maximum length is provided and the model has no predefined maximum length. Default to no truncation.


Sentence embeddings: tensor([[ 0.0015,  0.0165, -0.0281,  ..., -0.0309,  0.0297, -0.0327],
        [ 0.0151,  0.0041, -0.0157,  ..., -0.0281,  0.0408, -0.0251]])


In [None]:
from sentence_transformers import SentenceTransformer
import faiss
import numpy as np

# 加载 BGE 模型
model = SentenceTransformer("BAAI/bge-base-zh-v1.5")

docs = [
    "如何申请保险理赔？",
    "车险理赔流程有哪些步骤？",
    "医疗保险的报销范围是什么？",
    "我丢了保单还能理赔吗？"
]

# 嵌入这些文档为向量
doc_embeddings = model.encode(docs, normalize_embeddings=True)

# 创建 FAISS 索引
dimension = doc_embeddings.shape[1]  
index = faiss.IndexFlatIP(dimension)  # Inner Product（点积）+ 归一化 = 余弦相似度
index.add(np.array(doc_embeddings))

# 模拟用户查询
query = "保险怎么报销？"
query_embedding = model.encode([query], normalize_embeddings=True)

# 在 FAISS 中检索相似文档
top_k = 3
scores, indices = index.search(np.array(query_embedding), top_k)

print("用户查询:", query)
for i, idx in enumerate(indices[0]):
    print(f"Top {i+1}: {docs[idx]}，相似度：{scores[0][i]:.4f}")


To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


用户查询: 保险怎么报销？
Top 1: 如何申请保险理赔？，相似度：0.7128
Top 2: 医疗保险的报销范围是什么？，相似度：0.6986
Top 3: 车险理赔流程有哪些步骤？，相似度：0.6433
