### Step1 - 导入相关包 & 初始化设置

In [1]:
import os, sys

from langchain_ollama import OllamaEmbeddings
from langchain_chroma import Chroma
from langchain_core.documents import Document

# 默读取当前目录下的 .env 文件, 可以通过 dotenv_path 来修改
from dotenv import load_dotenv
root_dir = os.path.dirname(os.getcwd())
load_dotenv(dotenv_path=os.path.join(root_dir, ".env"))

# 记录日志
from loguru import logger
logger.remove()
logger.add(sys.stderr, level=os.getenv("LOG_LEVEL"))

1

### Step2 - 加载 嵌入模型 & 向量数据库

In [2]:
# 加载嵌入模型
embed_model = OllamaEmbeddings(
    model=os.getenv("OLLAMA_EMB_MODEL"),
    base_url=os.getenv("OLLAMA_BASE_URL"),
)
embed_model

OllamaEmbeddings(model='qwen3-embedding:4b', validate_model_on_init=False, base_url='http://localhost:11434', client_kwargs={}, async_client_kwargs={}, sync_client_kwargs={}, mirostat=None, mirostat_eta=None, mirostat_tau=None, num_ctx=None, num_gpu=None, keep_alive=None, num_thread=None, repeat_last_n=None, repeat_penalty=None, temperature=None, stop=None, tfs_z=None, top_k=None, top_p=None)

In [3]:
# 加载向量数据库
vector_db = Chroma(
    collection_name="example_collection",
    embedding_function=embed_model,
    persist_directory="./chroma_db",
)
vector_db

<langchain_chroma.vectorstores.Chroma at 0x16fd68457f0>

### Step3 - LangChain 向量工具
这里以 Chroma 为例: https://reference.langchain.com/python/integrations/langchain_chroma/#langchain_chroma.Chroma

In [4]:
# Add Documents
document_1 = Document(page_content="foo", metadata={"baz": "bar"})
document_2 = Document(page_content="thud", metadata={"bar": "baz"})
document_3 = Document(page_content="i will be deleted :(")

documents = [document_1, document_2, document_3]
ids = ["1", "2", "3"]
vector_db.add_documents(documents=documents, ids=ids)

['1', '2', '3']

In [5]:
# Update Documents
updated_document = Document(
    page_content="qux",
    metadata={"bar": "baz"},
)
vector_db.update_documents(ids=["1"], documents=[updated_document])

In [6]:
# Delete Documents
vector_db.delete(ids=["3"])

In [7]:
# Search with filter
results = vector_db.similarity_search(
    query="thud", k=1, filter={"baz": "bar"}
)
for doc in results:
    print(f"* {doc.page_content} [{doc.metadata}]")

* qux [{'bar': 'baz', 'baz': 'bar'}]


In [8]:
# Search with score
results = vector_db.similarity_search_with_score(query="qux", k=1)
for doc, score in results:
    print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")

* [SIM=0.000000] qux [{'bar': 'baz', 'baz': 'bar'}]


In [9]:
# Use as Retriever
retriever = vector_db.as_retriever(
    search_type="mmr",
    search_kwargs={"k": 1, "fetch_k": 2, "lambda_mult": 0.5},
)
retriever.invoke("thud")

[Document(id='2', metadata={'bar': 'baz'}, page_content='thud')]

In [10]:
# 异步部分 (Async)
# 这部分代码无法执行, 仅作展示用来理解

try:
    # add documents
    await vector_db.aadd_documents(documents=documents, ids=ids)

    # delete documents
    await vector_db.adelete(ids=["3"])

    # search
    results = vector_db.asimilarity_search(query="thud",k=1)

    # search with score
    results = await vector_db.asimilarity_search_with_score(query="qux", k=1)
    for doc, score in results:
        print(f"* [SIM={score:3f}] {doc.page_content} [{doc.metadata}]")
except Exception as e:
    pass

* [SIM=0.568420] foo [{'bar': 'baz', 'baz': 'bar'}]


  results = await vector_db.asimilarity_search_with_score(query="qux", k=1)
