In [1]:
from pymilvus import connections, db, utility, FieldSchema, CollectionSchema, DataType, Collection
import ollama

In [14]:
def get_embedding(model:str, prompt: str) -> list:
    response = ollama.embeddings(model=model, prompt=prompt)
    return response.get('embedding', [])


em = get_embedding(model="nomic-embed-text:v1.5", prompt="Hello world")
len(em)

768

In [3]:
conn = connections.connect(host='localhost', port='19530')

In [4]:
db.list_database()

['default', 'JobOrder']

In [5]:
db.create_database('JobOrder')

2025-06-21 23:10:01,705 [ERROR][handler]: RPC error: [create_database], <MilvusException: (code=65535, message=database already exist: JobOrder)>, <Time:{'RPC start': '2025-06-21 23:10:01.701086', 'RPC error': '2025-06-21 23:10:01.705883'}> (decorators.py:140)


MilvusException: <MilvusException: (code=65535, message=database already exist: JobOrder)>

In [6]:
db.using_database('JobOrder')

In [7]:
utility.list_collections()

[]

In [15]:
id_feild = FieldSchema(name='id', dtype=DataType.INT64, is_primary=True, auto_id=True)
clinet_name_feild = FieldSchema(name='client_name', dtype=DataType.VARCHAR, max_length=100)
job_title_feild = FieldSchema(name='job_title', dtype=DataType.VARCHAR, max_length=100)
job_desc_feild = FieldSchema(name='job_desc', dtype=DataType.VARCHAR, max_length=10000)
job_desc_embedding_feild = FieldSchema(name='job_desc_embedding', dtype=DataType.FLOAT_VECTOR, dim=768)

job_oreder_schema = CollectionSchema([id_feild, clinet_name_feild, job_title_feild, job_desc_feild, job_desc_embedding_feild])

In [16]:
collection = Collection(name='job_order', schema=job_oreder_schema)

In [17]:
index_params = {
    'metric_type': 'L2',
    'index_type': 'HNSW',
    'params': {
        'M': 16,
        'efConstruction': 200
    }
}

collection.create_index(field_name='job_desc_embedding', index_params=index_params)

Status(code=0, message=)

In [20]:
collection.load()

In [30]:
sample_job_orders = {
    'client_name': [
        'Sammy',
        'Bob',
        'Charlie',
        'Diana'
    ],
    'job_title': [
        'Software Engineer',
        'Data Scientist',
        'Backend Developer',
        'Frontend Developer'
    ],
    'job_desc': [
        '''
        As a Software Engineer, your responsibilities will include writing clean and maintainable code, collaborating with team members to define software requirements, participating in code reviews, troubleshooting and upgrading existing software, and ensuring the performance and responsiveness of applications. You will also be expected to stay current with emerging technologies and industry trends.
        ''',
        '''
        As a Data Scientist, you will analyze large amounts of raw information to find patterns that will help improve our company. We will rely on you to build data products to extract valuable business insights.
        ''',
        '''
        As a Backend Developer, you will design and implement server-side logic, maintain databases, and ensure high performance and responsiveness to requests from the front-end.
        ''',
        '''
        As a Frontend Developer, you will be responsible for implementing visual elements that users see and interact with in a web application, ensuring a seamless user experience.
        '''
    ]
}

# Example: To add embeddings for each job_desc
sample_job_orders['job_desc_embedding'] = [
    get_embedding(model='nomic-embed-text:v1.5', prompt=desc)
    for desc in sample_job_orders['job_desc']
]


In [31]:
collection.insert([sample_job_orders['client_name'], sample_job_orders['job_title'], sample_job_orders['job_desc'], sample_job_orders['job_desc_embedding']])

(insert count: 4, delete count: 0, upsert count: 0, timestamp: 458890654672224259, success count: 4, err count: 0

In [36]:
result = collection.query(expr="id >= 0", output_fields=["id", "client_name", "job_title", "job_desc", "job_desc_embedding"])
for item in result:
    print(item.get('id'))

458886263163361655
458886263163361656
458886263163361657
458886263163361658


In [35]:
ids_to_delete = [458886263163361653]
expr = f"id in {ids_to_delete}"
collection.delete(expr=expr)

(insert count: 0, delete count: 1, upsert count: 0, timestamp: 458890677751644162, success count: 0, err count: 0

In [39]:
sample_job_order = {
    'client_name': ['Alice'],
    'job_title': ['DevOps Engineer'],
    'job_desc': ['''As a DevOps Engineer, you will be responsible for automating and streamlining operations and processes, building and maintaining tools for deployment, monitoring, and operations, and troubleshooting and resolving issues in our development, test, and production environments.
''']
}
sample_job_order['job_desc_embedding'] = [get_embedding(model='nomic-embed-text:v1.5', prompt=sample_job_order['job_desc'][0])]
collection.insert([[458886263163361653], sample_job_order['client_name'], sample_job_order['job_title'], sample_job_order['job_desc'], sample_job_order['job_desc_embedding']])

DataNotMatchException: <DataNotMatchException: (code=1, message=The data doesn't match with schema fields, expect 4 list, got 5)>