In [1]:
import ollama

dummy_data_1 = {
    'name': 'John Doe',
    'email': 'example@mail.com',
    'telephone': '1234567890',
    'address': '1234 Main Street, City, Country',
    'Overview': 'I am John Doe, an autistic individual with a strong passion for computers...',
    'skills': ['Python', 'Java', 'C++', 'JavaScript', 'HTML', 'CSS'],
    'disability': 'Autism',
    'social': "https://www.linkedin.com/in/johndoe",
    'work': [
        {'title': 'Software Developer', 'company': 'ABC Inc.', 'start_date': 'January 2020', 'end_date': 'Present', 'description': 'Developed software applications using Python, Java, and C++.'},
        {'title': 'Technical Support Specialist', 'company': 'XYZ Corp.', 'start_date': 'June 2018', 'end_date': 'December 2019', 'description': 'Provided technical support to customers and resolved software issues.'}
    ],
    'education': [
        {'degree': 'Bachelor of Science in Computer Science', 'school': 'University of ABC', 'graduation_date': 'May 2018', 'start_date': 'September 2014'}
    ]
}

dummy_data_2 = {
    'name': 'Jane Smith',
    'email': 'jane.smith@mail.com',
    'telephone': '0987654321',
    'address': '4321 Second Street, City, Country',
    'Overview': 'I am Jane Smith, a passionate data scientist with expertise in machine learning...',
    'skills': ['Python', 'R', 'SQL', 'TensorFlow', 'Keras'],
    'disability': 'ADHD',
    'social': "https://www.linkedin.com/in/janesmith",
    'work': [
        {'title': 'Data Scientist', 'company': 'Data Corp', 'start_date': 'February 2021', 'end_date': 'Present', 'description': 'Developed and implemented machine learning models.'},
        {'title': 'Data Analyst', 'company': 'Analytica', 'start_date': 'March 2019', 'end_date': 'January 2021', 'description': 'Analyzed data and provided insights to stakeholders.'}
    ],
    'education': [
        {'degree': 'Master of Science in Data Science', 'school': 'University of XYZ', 'graduation_date': 'December 2018', 'start_date': 'September 2016'}
    ]
}


def parse_profile(profile):
    # parse the profile and return the text
    text = profile['Overview']
    for work in profile['work']:
        text += f" {work['title']} at {work['company']} from {work['start_date']} to {work['end_date']}. {work['description']}"
    for education in profile['education']:
        text += f" {education['degree']} at {education['school']} from {education['start_date']} to {education['graduation_date']}"
    text += f" Skills: {', '.join(profile['skills'])}"
    text += f" Disability: {profile['disability']}"
    return text

# Generate embeddings for both profiles
embedding_1 = ollama.embeddings(model='mxbai-embed-large', prompt=parse_profile(dummy_data_1))["embedding"]
embedding_2 = ollama.embeddings(model='mxbai-embed-large', prompt=parse_profile(dummy_data_2))["embedding"]


## Data entry in The DB

In [2]:
from pymilvus import connections, FieldSchema, CollectionSchema, DataType, Collection

# Connect to Milvus server
connections.connect("default", host="localhost", port="19530")

# Define the schema for the collection
fields = [
    FieldSchema(name="name", dtype=DataType.VARCHAR, max_length=255, is_primary=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=1024)
]
schema = CollectionSchema(fields, "User Profiles collection")

# Create a collection
collection = Collection(name="user_profiles", schema=schema)

# Insert data into the collection
data = [
    ["John Doe", "Jane Smith"],
    [embedding_1, embedding_2]
]

collection.insert(data)

(insert count: 2, delete count: 0, upsert count: 0, timestamp: 450871615321800717, success count: 2, err count: 0, cost: 0)

## Data search in the DB

In [3]:
# Load the collection into memory
collection.load()

# prompt to search for similar profiles
prompt="I am looking for a software developer with experience in Python and Java"

# Generate an embedding for the query
query_embedding = ollama.embeddings(model='mxbai-embed-large', prompt=prompt)["embedding"]

# Define search parameters L2 means Euclidean distance, nprobe is the number of clusters to search
search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
results = collection.search([query_embedding], "embedding", search_params, limit=2)

# Print search results
for result in results:
    print(f"Name: {result.entity.name}, Distance: {result.distance}")

RPC error: [load_collection], <MilvusException: (code=700, message=index not found[collection=user_profiles])>, <Time:{'RPC start': '2024-07-02 18:45:21.588773', 'RPC error': '2024-07-02 18:45:21.591820'}>


MilvusException: <MilvusException: (code=700, message=index not found[collection=user_profiles])>