In [None]:
# %pip install pymilvus==2.3.5
# %pip install langchain==0.0.354
# %pip install tiktoken==0.5.2
# %pip install transformers==4.36.2
# %pip install pandas==2.1.4
# %pip install pdfminer==20191125
# %pip install pdfminer.six==20221105
# %pip install sentence-transformers

### Connecting to Milvus DB

In [None]:
from pymilvus import connections

connections.add_connection(
    
    learn = {
        "host":"localhost",
        'port':'19530',
        'username':'',
        'password':''
    }

)

connection_id = 'learn'
connections.connect(connection_id)

connections.list_connections()

### Creating DB and Users

In [None]:
from pymilvus import db
#db has a list_database method for us to list all the current databases given access to

current_dbs = db.list_database(using=connection_id)
print(f"Current DBS: {current_dbs}")

db_name = 'milvus_db_learn'

if db_name not in current_dbs:
    print(f"Creating DB: {db_name}")
    wiki_db = db.create_database(db_name, using=connection_id)

db.using_database(db_name, using=connection_id)

#### Creating Users

In [None]:
from pymilvus import Role, utility

current_users= utility.list_usernames(using=connection_id)
print(f"All Current users are: {current_users}")

user_name = "client_1"

if user_name not in current_users:
    print(f"Creating New User: {user_name}")
    utility.create_user(user_name, "password", using=connection_id)

public_role = Role("public", using=connection_id)
print(f"Role Public Exists? : {public_role.is_exist()}")

public_role.add_user(user_name)

### Creating Collections

In [None]:
from pymilvus import CollectionSchema, FieldSchema, DataType, Collection
import json


#Defining Fields
course_id = FieldSchema(
    name='course_id',
    dtype=DataType.INT64,
    is_primary = True
)

title = FieldSchema(
    name ='title',
    dtype=DataType.VARCHAR,
    max_length = 256
)

description = FieldSchema(
    name ='description',
    dtype=DataType.VARCHAR,
    max_length = 2048
)

#setting up Dimensions

desc_embeddings = FieldSchema(
    name='desc_embeddings',
    dtype=DataType.FLOAT_VECTOR,
    dim= 384
)

wiki_schema = CollectionSchema(
    fields=[course_id, title, description, desc_embeddings],
    description='Course Details',
    enable_dynamic_field = True
)

collection_name = 'Course_List'

wiki_collections = Collection(
    name = collection_name,
    schema = wiki_schema,
    using=connection_id,
    shard_num = 2

)

print(f"Current Collection : {utility.list_collections(using = connection_id)}")

r_collection = Collection( collection_name, using=connection_id)
print(f'\n {r_collection.schema}')


## Inserting Data

In [None]:
import pandas as pd

In [None]:
run_file = pd.read_csv('course.csv')
run_file.head()

In [None]:
from sentence_transformers import SentenceTransformer

embedding_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')

In [None]:
i_course_id = run_file['Course ID'].to_list()
i_course_name = run_file['Title'].to_list()
i_description = run_file['Description'].to_list()

i_desc_embeddings = [embedding_model.encode(i) for i in i_description]

insert_data = [i_course_id, i_course_name, i_description, i_desc_embeddings]


In [None]:
insert_data

In [None]:
course_collection = Collection(collection_name, using=connection_id)

mr = course_collection.insert(insert_data)

print("Inserted Data !!!,  Now Flushing")
course_collection.flush(timeout=360)

### Building an Index

In [None]:
index_params = {
    'metric_type':'COSINE',
    'index_type':'IVF_FLAT',
    'params':{"nlist":1024}
}

course_collection.create_index(
    field_name='desc_embeddings',
    index_params = index_params
)

utility.index_building_progress(collection_name, using= connection_id)

#### Scaler Query

In [None]:
course_collection.load()

In [None]:
q_result = course_collection.query(
    expr = "course_id == 1003",
    output_fields=['title', 'description']
)
print(q_result)
print(type(q_result[0]))

In [None]:
q_result_2 = course_collection.query(
    expr= "(title LIKE 'Database%') && (course_id > 1001)",
    output_fields= ['title','description']
)

print(q_result_2)

### Searching Vector Fields

In [None]:
search_params = {
    # 'index_type': 'IVF_FLAT',
    'metric_type': 'COSINE',
    'ignore_growing': False,
    'params':{"nprobe":10}
}

search_string = "Machine Learning"
search_embed = embedding_model.encode(search_string)

s_result = course_collection.search(
    data = [search_embed], #input query for search
    anns_field="desc_embeddings", #field to search with ANN
    param= search_params,
    limit=10,
    expr=None,
    output_fields=['title'],
    consistency_level = 'Strong' # Controls data it is processing will be considered for the search

)
print(s_result)

print(f"Search result object: {type(s_result[0])}")

for i in s_result[0]:
    print(i.id, round(i.distance, 3), "\t", i.entity.get('title'))

In [None]:
search_query_2 = "Best movies of 2025"
search_embed_2  = embedding_model.encode(search_query_2)

s_result_2 = course_collection.search(
    data = [search_embed_2],
    anns_field="desc_embeddings",
    param= search_params,
    limit =10,
    expr=None,
    output_fields=['title'],
    consistency_level = 'Strong'
)

print(s_result_2)

for hit in s_result_2[0]:
    print(hit.id, hit.distance, hit.entity.get('title'))

### Deleting Objects and Entities

In [None]:
course_collection.delete("course_id in [1002]")

In [None]:
# utility.drop_collection(collection_name, using= connection_id)

In [None]:
db.drop_database(db_name, using=connection_id)