In [None]:
import sys
import time
import string
import json
import random
import numpy as np
import pandas as pd
from milvus import default_server, debug_server
from pymilvus import model, CollectionSchema, FieldSchema, DataType, utility, connections, Collection, BulkInsertState, Role

# Spinning-up Local Milvus Server

In [None]:
# Cleanup previous data and stop server in case it is still running.
default_server.stop()
default_server.cleanup()

# Start a new milvus-lite local server.
start_time = time.time()
default_server.start()
end_time = time.time()

print('Server startup time: {}'.format(round(end_time - start_time, 3)))
port_number = default_server.listen_port
print('Server started on port number: {}'.format(port_number))

# Connect to Milvus DB

In [None]:
connect_name = 'default'
print('Connecting to Milvus ...')
connections.connect(connect_name, host='localhost', port=port_number, user='root', password='Milvus')
print('Connection established!')

# Collections

In [None]:
# Listing existing collections
print('Existing collections: {}'.format(utility.list_collections()))

# Dropping all exisitng collections to starts off fresh
for collection_name in utility.list_collections(): utility.drop_collection(collection_name)
print('Deleted all collections!')

## Defining Field & Collection Schema

In [None]:
id = FieldSchema(
    name='id',
    dtype=DataType.INT64,
    is_primary=True,
    auto_id=True,
)
text_title = FieldSchema(
    name='title',
    dtype=DataType.VARCHAR,
    max_length=256
)
text = FieldSchema(
    name='text',
    dtype=DataType.VARCHAR,
    max_length=65535,
)
text_embedding = FieldSchema(
    name='text_embedding',
    dtype=DataType.FLOAT_VECTOR,
    dim=384
)
read_count = FieldSchema(
    name='read_count',
    dtype=DataType.INT64,
    default_value=0
)

text_collection_schema = CollectionSchema(
    fields=[id, text_title, text, text_embedding, read_count],
    description='A humble story of Lorem Ipsum dummy text',
    enable_dynamic_field=True
)

## Creating Collection

In [None]:
collection_name='lorem_ipsum_story'

print('Creating collection: {}'.format(collection_name))
collection = Collection(
    name=collection_name,
    schema=text_collection_schema,
    using='default',
    shards_num=3
)
print('Collection created!')

In [None]:
# Listing existing collections
print('Existing collections: {}'.format(utility.list_collections()))

## Renaming Collection

## Dropping Collection

## Creating Partition

## Dropping Partition

## Inserting Data

In [None]:
from pymilvus.model.hybrid import BGEM3EmbeddingFunction

bge_m3_ef = BGEM3EmbeddingFunction(
    model_name='BAAI/bge-m3', # Specify the model name
    device='cpu', # Specify the device to use, e.g., 'cpu' or 'cuda:0'
    use_fp16=False # Specify whether to use fp16. Set to `False` if `device` is `cpu`.
)

In [None]:
# Creating dummy data
# Dummy text_titles
text_titles = [
    'Design Focus Filler',
    'Popularity Surge History',
    'Misunderstood Nonsense Text',
    'Cicero’s Speedy Rebuttal',
    'Latin Placeholder Origin',
    'Lorem’s True Source',
    'Cicero’s Pain Philosophy',
    'Typesetting Historical Roots',
    'Lorem Ipsum’s 1960s Rise',
    'Graphic Arts Classical Debate'
]

# Dummy texts
texts = [
    'The purpose of lorem ipsum is to create a natural looking block of text (sentence, paragraph, page, etc.) that doesn\'t distract from the layout. A practice not without controversy, laying out pages with meaningless filler text can be very useful when the focus is meant to be on design, not content.',
    'The passage experienced a surge in popularity during the 1960s when Letraset used it on their dry-transfer sheets, and again during the 90s as desktop publishers bundled the text with their software. Today it\'s seen all around the web; on templates, websites, and stock designs. Use our generator to get your own, or read on for the authoritative history of lorem ipsum.',
    'Until recently, the prevailing view assumed lorem ipsum was born as a nonsense text. “It\'s not Latin, though it looks like it, and it actually says nothing,” Before & After magazine answered a curious reader, “Its ‘words’ loosely approximate the frequency with which letters occur in English, which is why at a glance it looks pretty real.”',
    'As Cicero would put it, “Um, not so fast.”',
    'The placeholder text, beginning with the line “Lorem ipsum dolor sit amet, consectetur adipiscing elit”, looks like Latin because in its youth, centuries ago, it was Latin.',
    'Richard McClintock, a Latin scholar from Hampden-Sydney College, is credited with discovering the source behind the ubiquitous filler text. In seeing a sample of lorem ipsum, his interest was piqued by consectetur—a genuine, albeit rare, Latin word. Consulting a Latin dictionary led McClintock to a passage from De Finibus Bonorum et Malorum (“On the Extremes of Good and Evil”), a first-century B.C. text from the Roman philosopher Cicero.',
    'In particular, the garbled words of lorem ipsum bear an unmistakable resemblance to sections 1.10.32–33 of Cicero\'s work, with the most notable passage excerpted below: “Neque porro quisquam est, qui dolorem ipsum quia dolor sit amet, consectetur, adipisci velit, sed quia non numquam eius modi tempora incidunt ut labore et dolore magnam aliquam quaerat voluptatem.” A 1914 English translation by Harris Rackham reads: “Nor is there anyone who loves or pursues or desires to obtain pain of itself, because it is pain, but occasionally circumstances occur in which toil and pain can procure him some great pleasure.” McClintock\'s eye for detail certainly helped narrow the whereabouts of lorem ipsum\'s origin, however, the “how and when” still remain something of a mystery, with competing theories and timelines.',
    'So how did the classical Latin become so incoherent? According to McClintock, a 15th century typesetter likely scrambled part of Cicero\'s De Finibus in order to provide placeholder text to mockup various fonts for a type specimen book.',
    'It\'s difficult to find examples of lorem ipsum in use before Letraset made it popular as a dummy text in the 1960s, although McClintock says he remembers coming across the lorem ipsum passage in a book of old metal type samples. So far he hasn\'t relocated where he once saw the passage, but the popularity of Cicero in the 15th century supports the theory that the filler text has been used for centuries.',
    'And anyways, as Cecil Adams reasoned, “[Do you really] think graphic arts supply houses were hiring classics scholars in the 1960s?” Perhaps. But it seems reasonable to imagine that there was a version in use far before the age of Letraset.'
]

# Dummy text_embddings
sentence_transformer_embedder = model.dense.SentenceTransformerEmbeddingFunction(
    batch_size=32,
    model_name='all-MiniLM-L6-v2',
    device='cuda'
)
text_embeddings = sentence_transformer_embedder.encode_documents(texts)

# Dummy read_count
read_counts = np.random.randint(0, 10000, size=(len(texts)))

data = [
    # id (not required as auto_id is enabled)
    text_titles,
    texts,
    text_embeddings,
    read_counts
]

In [None]:
data_insert = collection.insert(data)

In [None]:
expression = 'id in [1]'
data_insert = collection.delete(expression)

## Compacting Data

In [None]:
collection.compact()

## Indexing Data

In [None]:
index_params = {
    'metric_type': 'L2',
    'index_type': 'ANNOY
}

In [None]:
index_params = {
    'metric_type': 'L2',
    'index_type': 'FLAT',
    'index_name': 'l2_flat_text_embedding'
}

print('Creating index on text_embedding ...')
collection.create_index(
    field_name='text_embedding',
    index_params=index_params
)
print('Index created!')

## Dropping Index

## Searching & Querying

In [None]:
# Loading the data in memory (required before querying/searching)
collection.load(replica_number=1)

**Vector similarity search**

In [None]:
query_text = 'What is Lorem Ipsum?'
sentence_transformer_embedder = model.dense.SentenceTransformerEmbeddingFunction(
    batch_size=32,
    model_name='all-MiniLM-L6-v2',
    device='cuda'
)
query_embeddings = sentence_transformer_embedder.encode_documents(query_text)

results = collection.search(
    data=query_embeddings,
    anns_field='text_embedding',
    param={
        'metric_type': 'L2'
    },
    limit=5,
    expr=None,
    output_fields=['id', 'text', 'read_count']
)

results_df = {
    'id': [],
    'distance': [],
    'text': [],
    'read_count': []
}

for result in results[0]:
    results_df['id'].append(result.id)
    results_df['distance'].append(result.distance)
    results_df['text'].append(result.entity.text)
    results_df['read_count'].append(result.entity.read_count)

results_df = pd.DataFrame.from_dict(results_df)
results_df

**Scalar search**

In [None]:
results = collection.query(
    limit=5,
    expr='id in [448619783558005029]',
    output_fields=['id', 'text', 'read_count']
)

results_df = {
    'id': [],
    'text': [],
    'read_count': []
}

for result in results:
    results_df['id'].append(result['id'])
    results_df['text'].append(result['text'])
    results_df['read_count'].append(result['read_count'])

results_df = pd.DataFrame.from_dict(results_df)
results_df

**Hybrid search**

In [None]:
query_text = 'What is Lorem Ipsum?'
sentence_transformer_embedder = model.dense.SentenceTransformerEmbeddingFunction(
    batch_size=32,
    model_name='all-MiniLM-L6-v2',
    device='cuda'
)
query_embeddings = sentence_transformer_embedder.encode_documents(query_text)

results = collection.search(
    data=query_embeddings,
    anns_field='text_embedding',
    param={
        'metric_type': 'L2'
    },
    limit=5,
    expr='id in [448619783558005029]',
    output_fields=['id', 'text', 'read_count']
)

results_df = {
    'id': [],
    'distance': [],
    'text': [],
    'read_count': []
}

for result in results[0]:
    results_df['id'].append(result.id)
    results_df['distance'].append(result.distance)
    results_df['text'].append(result.entity.text)
    results_df['read_count'].append(result.entity.read_count)

results_df = pd.DataFrame.from_dict(results_df)
results_df

In [None]:
# Release the loaded data in memory
collection.release()

# Roles

## Adding a role

In [None]:
print('Available roles: {}'.format(utility.list_roles(include_user_info=True, using='default')))

role_name = 'test_role'
role = Role(role_name, using='default')
role.create()

print('Available roles: {}'.format(utility.list_roles(include_user_info=True, using='default')))

## Granting permissions to the role

In [None]:
role.grant('Collection', '*', 'Search')
role.list_grants()

## Revoking permissions from the role

In [None]:
role.revoke('Collection', '*', 'Search')
role.list_grants()

## Adding a role

In [None]:
print('Available roles: {}'.format(utility.list_roles(include_user_info=True, using='default')))

role.drop()

print('Available roles: {}'.format(utility.list_roles(include_user_info=True, using='default')))

# Shut down and cleanup the milvus server.

In [None]:
# Shut down and cleanup the milvus server.
default_server.stop()
default_server.cleanup()