Make connection

In [26]:
from pymilvus import connections

connections.connect(alias="default", user="username", password="password", host="localhost", port="19530")

Create collection

In [27]:
from pymilvus import CollectionSchema, FieldSchema, DataType

book_id = FieldSchema(
    name="book_id",
    dtype=DataType.INT64,
    is_primary=True,
)
book_name = FieldSchema(
    name="book_name",
    dtype=DataType.VARCHAR,
    max_length=200,
    # The default value will be used if this field is left empty during data inserts or upserts.
    # The data type of `default_value` must be the same as that specified in `dtype`.
    default_value="Unknown",
)
word_count = FieldSchema(
    name="word_count",
    dtype=DataType.INT64,
    # The default value will be used if this field is left empty during data inserts or upserts.
    # The data type of `default_value` must be the same as that specified in `dtype`.
    default_value=9999,
)
book_intro = FieldSchema(name="book_intro", dtype=DataType.FLOAT_VECTOR, dim=2)
schema = CollectionSchema(
    fields=[book_id, book_name, word_count, book_intro], description="Test book search", enable_dynamic_field=True
)
collection_name = "book"

In [28]:
schema

{'auto_id': False, 'description': 'Test book search', 'fields': [{'name': 'book_id', 'description': '', 'type': <DataType.INT64: 5>, 'is_primary': True, 'auto_id': False}, {'name': 'book_name', 'description': '', 'type': <DataType.VARCHAR: 21>, 'params': {'max_length': 200}}, {'name': 'word_count', 'description': '', 'type': <DataType.INT64: 5>}, {'name': 'book_intro', 'description': '', 'type': <DataType.FLOAT_VECTOR: 101>, 'params': {'dim': 2}}], 'enable_dynamic_field': True}

In [29]:
from pymilvus import Collection

collection = Collection(name=collection_name, schema=schema, using="default", shards_num=2)

Insert data

In [30]:
import random
import pandas as pd

data = []
for i in range(10):
    book_id = i + 1
    book_name = f"Book {book_id}"
    word_count = random.randint(10000, 50000)
    book_intro = [random.random() for _ in range(2)]
    data.append([book_id, book_name, word_count, book_intro])

df = pd.DataFrame(data, columns=["book_id", "book_name", "word_count", "book_intro"])
df

Unnamed: 0,book_id,book_name,word_count,book_intro
0,1,Book 1,16445,"[0.6610175950476592, 0.33421595829939765]"
1,2,Book 2,22584,"[0.37291921139878503, 0.5479297831839793]"
2,3,Book 3,36040,"[0.7954882081296476, 0.7878658744388319]"
3,4,Book 4,11261,"[0.043185331672709326, 0.3498439724490018]"
4,5,Book 5,39583,"[0.3920103206313903, 0.8601687296944729]"
5,6,Book 6,49990,"[0.4052194588885134, 0.020604779220843916]"
6,7,Book 7,49207,"[0.7060329046695322, 0.025111229204348384]"
7,8,Book 8,12267,"[0.1466704245567495, 0.5430185533989226]"
8,9,Book 9,39384,"[0.49970534419837165, 0.5237505672379067]"
9,10,Book 10,26990,"[0.5327977764660792, 0.6376041502949953]"


In [31]:
from pymilvus import Collection

collection = Collection("book")  # Get an existing collection.
mr = collection.insert(df)

Build an index

In [32]:
index_params = {"metric_type": "COSINE", "index_type": "FLAT", "params": {"nlist": 1024}}

In [33]:
from pymilvus import Collection, utility

collection = Collection("book")
collection.create_index(field_name="book_intro", index_params=index_params)

utility.index_building_progress("book")

{'total_rows': 0, 'indexed_rows': 0, 'pending_index_rows': 0}

Search and query

In [36]:
from pymilvus import Collection
collection = Collection("book")      # Get an existing collection.
collection.load()

In [37]:
search_params = {
    "metric_type": "COSINE", 
    "offset": 0, 
    "ignore_growing": False, 
    "params": {"nprobe": 4}
}

In [40]:
results = collection.search(
    data=[[0.1, 0.2]], 
    anns_field="book_intro", 
    # the sum of `offset` in `param` and `limit` 
    # should be less than 16384.
    param=search_params,
    limit=10,
    expr=None,
    # set the names of the fields you want to 
    # retrieve from the search result.
    output_fields=['title'],
    consistency_level="Strong"
)

print(results[0].ids)

print(results[0].distances)

hit = results[0][0]
hit.entity.get('title')

[7, 5, 1, 2, 8, 10, 6, 9, 3, 4]
[0.9995388984680176, 0.9993508458137512, 0.9951781630516052, 0.9910438656806946, 0.9800982475280762, 0.973106861114502, 0.9722944498062134, 0.9558489322662354, 0.947149932384491, 0.9424784779548645]


In [39]:
results

["['id: 7, distance: 0.9995388984680176, entity: {}', 'id: 5, distance: 0.9993508458137512, entity: {}', 'id: 1, distance: 0.9951781630516052, entity: {}', 'id: 2, distance: 0.9910438656806946, entity: {}', 'id: 8, distance: 0.9800982475280762, entity: {}', 'id: 10, distance: 0.973106861114502, entity: {}', 'id: 6, distance: 0.9722944498062134, entity: {}', 'id: 9, distance: 0.9558489322662354, entity: {}', 'id: 3, distance: 0.947149932384491, entity: {}', 'id: 4, distance: 0.9424784779548645, entity: {}']"]