https://cloud.tencent.com/developer/article/2490861

https://weaviate.io/developers/weaviate/client-libraries/python

```shell
docker pull semitechnologies/weaviate:latest
```

```shell
docker run -d --name weaviate \
    --restart=always \
    -p 8080:8080 \
    -p 50051:50051 \
    -e "AUTHENTICATION_APIKEY_ENABLED=true" \
    -e "AUTHENTICATION_APIKEY_ALLOWED_KEYS=test-secret-key,test2-secret-key" \
    -e "AUTHENTICATION_APIKEY_USERS=test@2024.com,test2@2024.com" \
    -e "AUTHORIZATION_ADMINLIST_ENABLED=true" \
    -e "AUTHORIZATION_ADMINLIST_USERS=test@2024.com" \
    -e "AUTHORIZATION_ADMINLIST_READONLY_USERS=test2@2024.com" \
    -e WEAVIATE_HOSTNAME=0.0.0.0 \
    semitechnologies/weaviate:latest

```

参数说明

-d: 让容器在后台运行。

--name weaviate: 给容器命名为weaviate。

--restart=always: 配置容器在宿主机重启后自动启动。

-p 8080:8080: 将容器内的 8080 端口映射到宿主机的 8080 端口。

-p 50051:50051: 将容器内的 50051 端口映射到宿主机的 50051 端口。

-e "AUTHENTICATION_APIKEY_ENABLED=true": 启用 API 密钥认证功能。

-e "AUTHENTICATION_APIKEY_ALLOWED_KEYS=test-secret-key,test2-secret-key": 指定允许使用的 API 密钥列表。

-e "AUTHENTICATION_APIKEY_USERS=test@2024.com,test2@2024.com": 关联密钥与用户邮箱。

-e "AUTHORIZATION_ADMINLIST_ENABLED=true": 开启管理员列表授权。

-e "AUTHORIZATION_ADMINLIST_USERS=test@2024.com": 指定管理员列表中的用户。

-e "AUTHORIZATION_ADMINLIST_READONLY_USERS=test2@2024.com": 指定只读权限的用户列表。

-e WEAVIATE_HOSTNAME=0.0.0.0: 设置 Weaviate 的主机名，监听所有可用网络接口。

semitechnologies/weaviate:latest: 指定要从 Docker Hub 下载并运行的 Weaviate 镜像的最新版本。

In [1]:
!pip install -U weaviate-client

Looking in indexes: https://mirrors.aliyun.com/pypi/simple/


In [2]:
import weaviate
from weaviate.auth import AuthApiKey

In [3]:
# 连接到本地部署的 Weaviate
client = weaviate.connect_to_local(
    auth_credentials=AuthApiKey("test-secret-key")
)

# 检查连接是否成功
print(client.is_ready())

# 关闭连接
client.close()

True


In [4]:
# 或者自定义连接
client = weaviate.connect_to_custom(
    skip_init_checks=False,
    http_host="127.0.0.1",
    http_port=8080,
    http_secure=False,
    grpc_host="127.0.0.1",
    grpc_port=50051,
    grpc_secure=False,
    # 对应 AUTHENTICATION_APIKEY_ALLOWED_KEYS 中的密钥
    # 注意：此处只需要密钥即可，不需要用户名称
    auth_credentials=AuthApiKey("test-secret-key")
)

# 检查连接是否成功
print(client.is_ready())

# 关闭连接
client.close()

True


In [11]:
def create_collection(client: weaviate.WeaviateClient, collection_name: str):
    """
    创建集合
    :param client: Weaviate 客户端
    :param collection_name: 集合名称
    """
    collection_obj = {
        "class": collection_name,
        "description": "A collection for product information",
        "vectorizer": "none",  # 假设你会上传自己的向量
        "vectorIndexType": "hnsw",
        "vectorIndexConfig": {
            "distance": "cosine",
            "efConstruction": 200,
            "maxConnections": 64
        },
        "properties": [
            {
                "name": "text",
                "description": "The text content",
                "dataType": ["text"],
                "tokenization": "word",
                "indexFilterable": True,
                "indexSearchable": True
            }
        ]
    }
    try:
        client.collections.create_from_dict(collection_obj)
        print(f"创建集合 '{collection_name}' 成功.")
    except weaviate.exceptions.UnexpectedStatusCodeException as e:
        print(f"创建集合异常: {e}")

In [None]:
def save_documents(client: weaviate.WeaviateClient, collection_name: str, documents: list):
    """
    向集合中插入数据
    :param client: Weaviate 客户端
    :param collection_name: 集合名称
    :param documents: 文档列表
    """
    collection = client.collections.get(collection_name)
    for doc in documents:
        content = doc  # 假设文档是简单的字符串
        vector = [0.1, 0.2, 0.3]  # 假设这是你的向量
        properties = {
            "text": content
        }
        try:
            uuid = collection.data.insert(properties=properties, vector=vector)
            print(f"文档添加内容: {content[:30]}..., uuid: {uuid}")
        except Exception as e:
            print(f"添加文档异常: {e}")

In [10]:
def check_collection_exists(client: weaviate.WeaviateClient, collection_name: str) -> bool:
    """
    检查集合是否存在
    :param client: Weaviate 客户端
    :param collection_name: 集合名称
    :return: True 或 False
    """
    try:
        collections = client.collections.list_all()
        return collection_name in collections
    except Exception as e:
        print(f"检查集合异常: {e}")
        return False

In [None]:
def query_vector_collection(client: weaviate.WeaviateClient, 
                            collection_name: str, 
                            query: str, 
                            k: int) -> list:
    """
    从集合中查询数据
    :param client: Weaviate 客户端
    :param collection_name: 集合名称
    :param query: 查询字符串
    :param k: 返回的结果数量
    :return: 查询结果列表
    """
    vector = [0.1, 0.2, 0.3]  # 假设这是你的查询向量
    collection = client.collections.get(collection_name)
    response = collection.query.near_vector(
        near_vector=vector,
        limit=k
    )
    documents = [res.properties['text'] for res in response.objects]
    return documents

In [13]:
def delete_collection(client: weaviate.WeaviateClient, collection_name: str):
    """
    删除集合
    :param client: Weaviate 客户端
    :param collection_name: 集合名称
    """
    try:
        client.collections.delete(collection_name)
        print(f"删除集合 '{collection_name}' 成功.")
    except Exception as e:
        print(f"删除集合异常: {e}")

In [14]:
if __name__ == "__main__":
    # 连接 Weaviate
    client = weaviate.connect_to_local(auth_credentials=AuthApiKey("test-secret-key"))

    collection_name = "MyCollection"

    # 检查集合是否存在
    if not check_collection_exists(client, collection_name):
        # 创建集合
        create_collection(client, collection_name)

    # 插入数据
    documents = ["This is a test document.", "Another document for testing."]
    save_documents(client, collection_name, documents)

    # 查询数据
    query_results = query_vector_collection(client, collection_name, "test", 2)
    print("查询结果:", query_results)

    # 删除集合
    delete_collection(client, collection_name)

    # 关闭连接
    client.close()

创建集合 'MyCollection' 成功.
文档添加内容: This is a test document...., uuid: 6272cdac-bfb5-4844-af5b-c8bf99541bac
文档添加内容: Another document for testing...., uuid: 81bd56e2-4ba6-4b16-8adc-27fd3023fff9
查询结果: ['Another document for testing.', 'This is a test document.']
删除集合 'MyCollection' 成功.
