https://github.com/milvus-io/pymilvus/blob/master/examples/example.py

In [2]:
#! sudo apt install docker-compose

Reading package lists... Done
Building dependency tree       
Reading state information... Done
The following additional packages will be installed:
  python3-cached-property python3-docker python3-dockerpty python3-docopt
  python3-texttable python3-websocket
The following NEW packages will be installed:
  docker-compose python3-cached-property python3-docker python3-dockerpty
  python3-docopt python3-texttable python3-websocket
0 upgraded, 7 newly installed, 0 to remove and 3 not upgraded.
Need to get 262 kB of archives.
After this operation, 1616 kB of additional disk space will be used.
Do you want to continue? [Y/n] ^C


In [2]:
# https://github.com/milvus-io/milvus/releases/download/v2.2.8/milvus-standalone-docker-compose.yml
! wget https://github.com/milvus-io/milvus/releases/download/v2.2.8/milvus-standalone-docker-compose.yml -O docker-compose.yml
# ! docker compose up -d

--2023-05-07 18:43:38--  https://github.com/milvus-io/milvus/releases/download/v2.2.8/milvus-standalone-docker-compose.yml
Resolving github.com (github.com)... 20.200.245.247
Connecting to github.com (github.com)|20.200.245.247|:443... connected.
HTTP request sent, awaiting response... 302 Found
Location: https://objects.githubusercontent.com/github-production-release-asset-2e65be/208728772/9523b6f4-ee59-4693-9801-71de012d4aec?X-Amz-Algorithm=AWS4-HMAC-SHA256&X-Amz-Credential=AKIAIWNJYAX4CSVEH53A%2F20230507%2Fus-east-1%2Fs3%2Faws4_request&X-Amz-Date=20230507T184307Z&X-Amz-Expires=300&X-Amz-Signature=655601febc14bde8d2147cb3c65b6541c285b339ea593b36d96de18b49af7845&X-Amz-SignedHeaders=host&actor_id=0&key_id=0&repo_id=208728772&response-content-disposition=attachment%3B%20filename%3Dmilvus-standalone-docker-compose.yml&response-content-type=application%2Foctet-stream [following]
--2023-05-07 18:43:38--  https://objects.githubusercontent.com/github-production-release-asset-2e65be/208728772

In [5]:
! sudo docker-compose up -d

Creating milvus-etcd ... 
Creating milvus-minio ... 
[1BCreating milvus-standalone ... [0m[1A[2K
[1Bting milvus-standalone ... [32mdone[0m

In [8]:
! sudo docker run -d --name milvus-attu -p 8000:3000 -e MILVUS_URL=localhost:19530 zilliz/attu:v2.2.3

79f1ec3174c391440f3bcc6ae26106ab2d3c08efd629648ac29e68266337b0e6


In [1]:
! pip install grpcio-tools==1.53.0
! pip install pymilvus

Collecting grpcio-tools==1.53.0
  Downloading grpcio_tools-1.53.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (2.4 MB)
[K     |████████████████████████████████| 2.4 MB 3.2 MB/s eta 0:00:01
Collecting protobuf<5.0dev,>=4.21.6
  Downloading protobuf-4.22.4-cp37-abi3-manylinux2014_x86_64.whl (302 kB)
[K     |████████████████████████████████| 302 kB 102.2 MB/s eta 0:00:01
[?25hCollecting grpcio>=1.53.0
  Downloading grpcio-1.54.0-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (5.1 MB)
[K     |████████████████████████████████| 5.1 MB 62.5 MB/s eta 0:00:01
[?25hInstalling collected packages: protobuf, grpcio, grpcio-tools
Successfully installed grpcio-1.54.0 grpcio-tools-1.53.0 protobuf-4.22.4
Collecting pymilvus
  Downloading pymilvus-2.2.8-py3-none-any.whl (133 kB)
[K     |████████████████████████████████| 133 kB 3.0 MB/s eta 0:00:01
[?25hCollecting pandas>=1.2.4
  Downloading pandas-2.0.1-cp38-cp38-manylinux_2_17_x86_64.manylinux2014_x86_64.whl (12.3 MB)
[K 

In [21]:
import random
import numpy as np
from datetime import datetime

from pymilvus import (
    connections,
    FieldSchema, CollectionSchema, DataType,
    Collection,
    utility
)

# This example shows how to:
#   1. connect to Milvus server
#   2. create a collection
#   3. insert entities
#   4. create index
#   5. search


_HOST = '127.0.0.1'
_PORT = '19530'

# Const names
_COLLECTION_NAME = 'embeddings'

_ID_FIELD_NAME = 'id'
_VECTOR_FIELD_NAME = 'title_vector'

# Vector parameters
_DIM = 1536
_INDEX_FILE_SIZE = 32  # max file size of stored index

# Index parameters
_METRIC_TYPE = 'L2'
_INDEX_TYPE = 'HNSW'
# _NLIST = 1024
# _NPROBE = 16
_TOPK = 20

_HNSW_PARAM_M = 8
_HNSW_PARAM_EFC = 64
_HNSW_SEARCH_PARAM_EF = 8192


# Create a Milvus connection
def create_connection():
    print(f"\nCreate connection...")
    connections.connect(host=_HOST, port=_PORT)
    print(f"\nList connections:")
    print(connections.list_connections())


# Create a collection named 'demo'
def create_collection():
    field1 = FieldSchema(name=_ID_FIELD_NAME, dtype=DataType.INT64, description="int64", auto_id=True, is_primary=True)
    field2 = FieldSchema(name="title", dtype=DataType.VARCHAR, max_length=1024, description="title", is_primary=False)
    field3 = FieldSchema(name="content", dtype=DataType.VARCHAR, max_length=8192, description="content", is_primary=False)
    field4 = FieldSchema(name="url", dtype=DataType.VARCHAR, max_length=1024, description="url", is_primary=False)
    field5 = FieldSchema(name="vector_id", dtype=DataType.INT64, description="vector_id", is_primary=False)

    field6 = FieldSchema(name=_VECTOR_FIELD_NAME, dtype=DataType.FLOAT_VECTOR, description="title float vector", dim=_DIM, is_primary=False)
    schema = CollectionSchema(fields=[field1, field2, field3, field4, field5, field6], description="collection description")
    collection = Collection(name=_COLLECTION_NAME, data=None, schema=schema)
    print("\ncollection created:", _COLLECTION_NAME)
    return collection


def has_collection(name):
    return utility.has_collection(name)


def get_collection(name):
    collection = Collection(name)

# Drop a collection in Milvus
def drop_collection(name):
    collection = Collection(name)
    collection.drop()
    print("\nDrop collection: {}".format(name))


# List all collections in Milvus
def list_collections():
    print("\nlist collections:")
    print(utility.list_collections())

def insert(collection, num, dim):
    data = [
        # [i for i in range(num)],
        ["title {0}".format(datetime.now()) for _ in range(num)],
        ["content {0}".format(datetime.now()) for _ in range(num)],
        ["http://www.badiu.com?{0}".format(datetime.now()) for _ in range(num)],
        [i for i in range(num)],
        [[random.random() for _ in range(dim)] for _ in range(num)],
    ]
    collection.insert(data)
    return data[1]


def get_entity_num(collection):
    print("\nThe number of entity:")
    print(collection.num_entities)


def create_index(collection, filed_name):
    index_param = {
        "index_type": _INDEX_TYPE,
        # "params": {"nlist": _NLIST},              # for FLAT
        'params':{'M': _HNSW_PARAM_M, 'efConstruction': _HNSW_PARAM_EFC},    # for HNSW reference openAI cookbook
        "metric_type": _METRIC_TYPE
    }
    collection.create_index(filed_name, index_param)
    print("\nCreated index:\n{}".format(collection.index().params))


def drop_index(collection):
    collection.drop_index()
    print("\nDrop index sucessfully")


def load_collection(collection):
    collection.load()


def release_collection(collection):
    collection.release()


def search(collection, vector_field, id_field, search_vectors, outputFields):
    search_param = {
        "data": search_vectors,
        "anns_field": vector_field,
        "param": {
              "metric_type": _METRIC_TYPE
            , 'params':{'M': _HNSW_PARAM_M, 'ef': _HNSW_SEARCH_PARAM_EF}    # for HNSW reference openAI cookbook
        },
        "limit": _TOPK,
        "expr": _ID_FIELD_NAME + " >= 0",
    }
    results = collection.search(**search_param, output_fields=outputFields)
    for i, result in enumerate(results):
        print("\nSearch result for {}th vector: ".format(i))
        for j, res in enumerate(result):
            print("Top {}: {}".format(j, res))


# def set_properties(collection):
    # https://github.com/milvus-io/milvus/issues/21731
    # [Bug]: The data disappears from the object store without ttl #21731
    # collection.set_properties(properties={"collection.ttl.seconds": 1800})


# def main():
    


# if __name__ == '__main__':
#     main()

In [22]:
# create a connection
create_connection()


Create connection...

List connections:
[('default', <pymilvus.client.grpc_handler.GrpcHandler object at 0x7f4873e9abe0>)]


In [89]:
drop_collection(_COLLECTION_NAME)


Drop collection: embeddings


In [23]:
# drop collection if the collection exists
# if has_collection(_COLLECTION_NAME):
#    drop_collection(_COLLECTION_NAME)

# drop_collection("embeddings")

# create collection
collection = create_collection()

# alter ttl properties of collection level
# https://github.com/milvus-io/milvus/issues/21731
# [Bug]: The data disappears from the object store without ttl #21731
# set_properties(collection)


collection created: embeddings


In [24]:
# show collections
list_collections()


list collections:
['embeddings']


In [25]:
collection = Collection(_COLLECTION_NAME)

# create index
create_index(collection, _VECTOR_FIELD_NAME)


Created index:
{'index_type': 'HNSW', 'params': {'M': 8, 'efConstruction': 64}, 'metric_type': 'L2'}


In [31]:
# get the number of entities

collection = Collection(_COLLECTION_NAME)

get_entity_num(collection)

count = 10000

for i in range(50):
    # insert 10000 vectors with 128 dimension
    print("<<<<<<<<<<<<")
    print(i)
    vectors = insert(collection, count, _DIM)
    collection.flush()
    get_entity_num(collection)
    print(">>>>>>>>>>>>")


The number of entity:
500000
0

The number of entity:
510000
1

The number of entity:
520000
2

The number of entity:
530000
3

The number of entity:
540000
4

The number of entity:
550000
5

The number of entity:
560000
6

The number of entity:
570000
7

The number of entity:
580000
8

The number of entity:
590000
9

The number of entity:
600000
10

The number of entity:
610000
11

The number of entity:
620000
12

The number of entity:
630000
13

The number of entity:
640000
14

The number of entity:
650000
15

The number of entity:
660000
16

The number of entity:
670000
17

The number of entity:
680000
18

The number of entity:
690000
19

The number of entity:
700000
20

The number of entity:
710000
21

The number of entity:
720000
22

The number of entity:
730000
23

The number of entity:
740000
24

The number of entity:
750000
25

The number of entity:
760000
26

The number of entity:
770000
27

The number of entity:
780000
28

The number of entity:
790000
29

The number of entit

In [32]:
# get the number of entities
get_entity_num(collection)



The number of entity:
1000000


In [34]:
# load data to memory
load_collection(collection)

In [33]:
arr = [np.random.rand(1536).astype(np.float32)]
# search
search(collection, _VECTOR_FIELD_NAME, _ID_FIELD_NAME, arr, ["id","title","content","url"])


Search result for 0th vector: 
Top 0: id: 441330301192423686, distance: 218.5906982421875, entity: {'id': 441330301192423686, 'title': 'title 2023-05-08 11:39:28.281839', 'content': 'content 2023-05-08 11:39:28.322040', 'url': 'http://www.badiu.com?2023-05-08 11:39:28.363164'}
Top 1: id: 441330301192005674, distance: 222.07598876953125, entity: {'id': 441330301192005674, 'title': 'title 2023-05-08 10:47:27.560326', 'content': 'content 2023-05-08 10:47:27.590751', 'url': 'http://www.badiu.com?2023-05-08 10:47:27.621729'}
Top 2: id: 441330301192737592, distance: 222.26596069335938, entity: {'id': 441330301192737592, 'title': 'title 2023-05-08 11:45:33.139127', 'content': 'content 2023-05-08 11:45:33.186371', 'url': 'http://www.badiu.com?2023-05-08 11:45:33.219532'}
Top 3: id: 441330301192796817, distance: 222.43731689453125, entity: {'id': 441330301192796817, 'title': 'title 2023-05-08 11:46:45.545567', 'content': 'content 2023-05-08 11:46:45.576503', 'url': 'http://www.badiu.com?2023-0

In [20]:
# release memory
release_collection(collection)

# drop collection index
drop_index(collection)

# drop collection
drop_collection(_COLLECTION_NAME)


Drop index sucessfully

Drop collection: embeddings
