<h1>Vector DB - Zilliz Cloud</h1>
<h4>1.1. Configure Python</h4>

In [None]:
# Install pymilvus compatible with Milvus v2.3.x
!python3 -m pip install pymilvus==2.3.7

# Install pymilvus compatible with Milvus v2.4.x
!python3 -m pip install pymilvus==2.4.4

# Update PyMilvus to the newest version
!python3 -m pip install --upgrade pymilvus

# Verify installation success
!python3 -m pip list | grep pymilvus

!python3 -m pip uninstall -y setuptools

# Required for pymilvus
!python3 -m pip install setuptools==69.0.3

<h4>1.2. Get Credentials</h4>
<p>Credentials are stored in an external <b>.yaml</b> file for privacy reasons.</p>

In [None]:
# Open the configuration file
import yaml

with open("credentials.yaml") as f:
    credentials = yaml.safe_load(f)
    
    CLUSTER_ENDPOINT = credentials["CLUSTER_ENDPOINT"]
    TOKEN = credentials["TOKEN"]

<h4>2.1. Connect to a Cluster</h4>

In [None]:
# Connect using a MilvusClient object
from pymilvus import MilvusClient

# Initialize a MilvusClient instance
client = MilvusClient(
    uri = CLUSTER_ENDPOINT, # Cluster endpoint obtained from the web console in their website
    token = TOKEN # API key (better) or a colon-separated cluster username and password
)

<h4>2.2. Create Collection with Quick Setup</h4>
<p>In this section, we will create a collection with a very simple setup, i.e., an id and a single vector field.</p>

In [None]:
# Create a collection in quick setup mode
client.create_collection(
    collection_name = "test_collection", # Collection Name
    dimension = 5 # Dimension of the vector embeddings (> 1)
)

# Collection with an id and a vector field
# + auto_id - Auto-incremental id
# + enable_dynamic_field enabled - Dynamic fields (allow extra data field during insertion) and include a $meta field

load_state = client.get_load_state(
    collection_name = "test_collection"
)

print(load_state)

<h4>2.3. Drop Collection</h4>
<p>In this tutorial, we will be using the free setup of Zilliz, hence, we can only have two collections instantiated at a time. Consequently, we will remove each collection as soon as we are done using it.</p>

In [None]:
# Drop Collection
client.drop_collection(
    collection_name = "test_collection"
)

<h4>2.4.1 Create Collection with Customized Setup - Create Schema</h4>
<p>Custom collections can be created by configuring (a) the schema, (b) the indexes, and (c) creating the actual collection.</p>

In [None]:
from pymilvus import DataType

# Create schema
schema = MilvusClient.create_schema(
    auto_id = False,
    enable_dynamic_field = True,
)

# Add fields to schema
schema.add_field(field_name = "custom_id", datatype = DataType.INT64, is_primary = True)
schema.add_field(field_name = "custom_vector", datatype = DataType.FLOAT_VECTOR, dim = 5)

<h4>2.4.2. Create Collection with Customized Setup - Create Indexes</h4>

In [None]:
# Prepare index parameters
index_params = client.prepare_index_params()

# Add indexes
index_params.add_index(
    field_name = "custom_id",
    index_type = "STL_SORT" # Standard Library Sort
)

index_params.add_index(
    field_name = "custom_vector", 
    index_type = "AUTOINDEX", # Customized version of HNSW (Hierarchical Navigable Small World, high dimensional
                              # similarity search) and DiskANN (graph-based indexing and search system)
    metric_type = "IP", # Inner Product
    params = { "nlist": 128 }
)

<h4>2.4.3 Create Collection with Customized Setup - Create the Collection</h4>

In [None]:
import time

# Create a collection with the index
client.create_collection(
    collection_name = "custom_collection",
    schema = schema,
    index_params = index_params
)

time.sleep(5)

res = client.get_load_state(
    collection_name = "custom_collection"
)

print(res)

<h4>2.5. Create a Collection with Multiple Vector Fields</h4>
<p>Collections in Zilliz can contain up to 5 vector fields.</p>

In [None]:
# Create a collection with multiple vector fields
schema = client.create_schema(
    auto_id = False,
    enable_dynamic_field = True,
)

# Add primary key field to schema
schema.add_field(field_name = "custom_id", datatype = DataType.INT64, is_primary = True)

# Binary vector dimensions must be a multiple of 8
schema.add_field(field_name = "text_vector", datatype = DataType.BINARY_VECTOR, dim = 8)

schema.add_field(field_name = "image_vector", datatype = DataType.FLOAT_VECTOR, dim = 128)

In [None]:
# Prepare index parameters
index_params = client.prepare_index_params()

index_params.add_index(
    field_name = "text_vector",
    # In Zilliz Cloud, the index type should always be `AUTOINDEX`.
    index_type = "AUTOINDEX", 
    # For vector of the `BINARY_VECTOR` type, use `HAMMING` or `JACCARD` as the metric type.
    metric_type = "HAMMING", 
    params = { "nlist": 128 }
)

index_params.add_index(
    field_name = "image_vector", 
    index_type = "AUTOINDEX",
    metric_type = "IP",
    params = { "nlist": 128 }
)

client.create_collection(
    collection_name = "custom_multiple_vector_field_collection",
    schema = schema,
    index_params = index_params
)

time.sleep(5)

load_state = client.get_load_state(
    collection_name = "custom_multiple_vector_field_collection"
)

print(load_state)

<h4>2.6. View Collection</h4>

In [None]:
from pprint import pprint

descr = client.describe_collection(
    collection_name = "custom_multiple_vector_field_collection"
)

pprint(descr)

In [None]:
collection_list = client.list_collections()

print(collection_list)

<h4>2.7. Load and Release Collection</h4>

In [None]:
client.load_collection(
    collection_name = "custom_multiple_vector_field_collection"
)

load_state = client.get_load_state(
    collection_name = "custom_multiple_vector_field_collection"
)

print(load_state)

In [None]:
# Release the collection
client.release_collection(
    collection_name = "custom_multiple_vector_field_collection"
)

release_state = client.get_load_state(
    collection_name = "custom_multiple_vector_field_collection"
)

pprint(release_state)

<h4>2.8. Aliases</h4>
<p>In Zilliz, we can assign aliases to collections to refer to them using different names</p>

In [None]:
# Create alias
client.create_alias(
    collection_name = "custom_multiple_vector_field_collection",
    alias = "custom_alias"
)

client.create_alias(
    collection_name="custom_multiple_vector_field_collection",
    alias="another_custom_alias"
)

In [None]:
# Collect aliases
aliases = client.list_aliases(
    collection_name = "custom_multiple_vector_field_collection"
)

pprint(aliases)

In [None]:
# Describe alias
alias_desc = client.describe_alias(
    alias = "custom_alias"
)

pprint(alias_desc)

In [None]:
# Alter alias
client.alter_alias(
    collection_name = "custom_collection",
    alias = "another_custom_alias"
)

cc_alias_list = client.list_aliases(
    collection_name = "custom_collection"
)

cmvfc_alias_list = client.list_aliases(
    collection_name = "custom_multiple_vector_field_collection"
)

pprint(cc_alias_list)
pprint(cmvfc_alias_list)

In [None]:
# Drop Alias
client.drop_alias(
    alias = "custom_alias"
)

client.drop_alias(
    alias = "another_custom_alias"
)

aliases = client.list_aliases(
    collection_name = "custom_multiple_vector_field_collection"
)

pprint(aliases)

In [None]:
# Drop all remaning collections
client.drop_collection(
    collection_name = "custom_multiple_vector_field_collection"
)

client.drop_collection(
    collection_name = "custom_collection"
)

<h4>3.1. Manage Indexes</h4>

In [None]:
# Create schema
schema = MilvusClient.create_schema(
    auto_id = False,
    enable_dynamic_field = True,
)

schema.add_field(field_name = "id", datatype = DataType.INT64, is_primary = True)
schema.add_field(field_name = "vector", datatype = DataType.FLOAT_VECTOR, dim = 5)
schema.add_field(field_name = "scalar", datatype = DataType.FLOAT)

# Create collection
client.create_collection(
    collection_name = "index_collection", 
    schema = schema,
)

# Set up the index parameters
index_params = MilvusClient.prepare_index_params()

# Add an index on the vector field.
index_params.add_index(
    field_name = "vector",
    metric_type = "COSINE",
    index_type = "AUTOINDEX",
    index_name = "vector_index"
)

# Add an index to the scalar field
index_params.add_index(
    field_name = "scalar",
    index_type = "", # Empty = AUTOINDEX
    index_name = "scalar_index"
)

client.create_index(
  collection_name = "index_collection", # Specify the collection name
  index_params = index_params
)

# Create an index file
client.create_index(
    collection_name = "index_collection",
    index_params = index_params
)

In [None]:
# Describe index
index_list = client.list_indexes(
    collection_name = "index_collection"
)

pprint(index_list)

index_descr = client.describe_index(
    collection_name = "index_collection",
    index_name = "vector_index"
)

pprint(index_descr)

In [None]:
# Drop index
client.drop_index(
    collection_name = "index_collection",
    index_name = "vector_index"
)

# Describe index
index_list = client.list_indexes(
    collection_name = "index_collection"
)

pprint(index_list)

In [None]:
# Drop collection
client.drop_collection(
    collection_name = "index_collection",
)