In [1]:
from pymilvus import connections

def connect_to_milvus():
    connections.connect("default", host="localhost", port="19530")
    print("Connected to Milvus")

if __name__ == "__main__":
    connect_to_milvus()

Connected to Milvus


In [2]:
from pymilvus import FieldSchema, CollectionSchema, DataType, Collection, utility

def create_collection(collection_name="technology_collection"):
    fields = [
        FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
        FieldSchema(name="name", dtype=DataType.VARCHAR, max_length=100),
        FieldSchema(name="description", dtype=DataType.VARCHAR, max_length=500),
        FieldSchema(name="technology_stack", dtype=DataType.VARCHAR, max_length=100),
        FieldSchema(name="vector", dtype=DataType.FLOAT_VECTOR, dim=384)
    ]

    schema = CollectionSchema(fields, description="Technology Collection")

    if not utility.has_collection(collection_name):
        collection = Collection(name=collection_name, schema=schema)
        index_params = {"metric_type": "L2", "index_type": "HNSW", "params": {"M": 8, "efConstruction": 64}}
        collection.create_index(field_name="vector", index_params=index_params)
        print(f"Collection '{collection_name}' created.")

if __name__ == "__main__":
    create_collection()


In [3]:
import json

def load_data():
    data_json = '''
    {
        "Technology": {
            "Frontend": {
                "React": {
                    "Version": "17.0.2",
                    "Description": "React is a JavaScript library for building user interfaces..."
                },
                "Angular": {
                    "Version": "11.2.0",
                    "Description": "Angular is a platform and framework for building single-page applications..."
                },
                "Vue": {
                    "Version": "2.6.12",
                    "Description": "Vue.js is a progressive JavaScript framework used to build web interfaces..."
                }
            },
            "Backend": {
                "Node": {
                    "Version": "14.16.0",
                    "Description": "Node.js is an open-source, cross-platform JavaScript runtime environment..."
                },
                "Express": {
                    "Version": "4.17.1",
                    "Description": "Express.js is a web application framework for Node.js..."
                },
                "Django": {
                    "Version": "3.1.7",
                    "Description": "Django is a high-level Python web framework..."
                }
            }
        },
        "Technology_Stack": {
            "Frontend": ["React", "Angular", "Vue"],
            "Backend": ["Node", "Express", "Django"]
        }
    }
    '''
    return json.loads(data_json)

if __name__ == "__main__":
    data = load_data()
    print("Data Loaded:", data)


Data Loaded: {'Technology': {'Frontend': {'React': {'Version': '17.0.2', 'Description': 'React is a JavaScript library for building user interfaces...'}, 'Angular': {'Version': '11.2.0', 'Description': 'Angular is a platform and framework for building single-page applications...'}, 'Vue': {'Version': '2.6.12', 'Description': 'Vue.js is a progressive JavaScript framework used to build web interfaces...'}}, 'Backend': {'Node': {'Version': '14.16.0', 'Description': 'Node.js is an open-source, cross-platform JavaScript runtime environment...'}, 'Express': {'Version': '4.17.1', 'Description': 'Express.js is a web application framework for Node.js...'}, 'Django': {'Version': '3.1.7', 'Description': 'Django is a high-level Python web framework...'}}}, 'Technology_Stack': {'Frontend': ['React', 'Angular', 'Vue'], 'Backend': ['Node', 'Express', 'Django']}}


In [4]:
from sentence_transformers import SentenceTransformer

def load_model():
    model = SentenceTransformer("all-MiniLM-L6-v2")
    print("Embedding model loaded.")
    return model

if __name__ == "__main__":
    model = load_model()


  from .autonotebook import tqdm as notebook_tqdm


Embedding model loaded.


In [15]:
from pymilvus import Collection

def insert_data(collection_name="technology_collection"):
    connect_to_milvus()
    create_collection(collection_name)
    model = load_model()
    collection = Collection(name=collection_name)

    data = load_data()

    # Separate each field into its own list (column-major order)
    tech_names = []
    descriptions = []
    categories = []
    vectors = []

    for category, tech_list in data["Technology_Stack"].items():
        for tech in tech_list:
            description = data["Technology"].get(category, {}).get(tech, {}).get("Description", "")
            vector = model.encode(description).tolist()

            tech_names.append(tech)
            descriptions.append(description)
            categories.append(category)
            vectors.append(vector)

    # Ensure the insert format is a list of columns
    entities = [tech_names, descriptions, categories, vectors]

    collection.insert(entities)
    collection.flush()
    print(f"Inserted {len(tech_names)} records into '{collection_name}'.")

if __name__ == "__main__":
    insert_data()


Connected to Milvus
Embedding model loaded.
Inserted 6 records into 'technology_collection'.


In [17]:
from pymilvus import Collection

def search_technology(user_query, collection_name="technology_collection"):
    connect_to_milvus()
    model = load_model()
    query_vector = model.encode(user_query).tolist()

    collection = Collection(name=collection_name)
    collection.load()
    
    search_params = {"metric_type": "L2", "params": {"ef": 64}}
    
    results = collection.search(
        data=[query_vector],
        anns_field="vector",
        param=search_params,
        limit=3,
        output_fields=["name", "description", "technology_stack"]
    )

    for result in results[0]:
        print(f"Technology: {result.entity.get('name')}")
        print(f"Stack: {result.entity.get('technology_stack')}")
        print(f"Description: {result.entity.get('description')}\n")

if __name__ == "__main__":
    query = input("Enter your query: ")
    search_technology(query)


Connected to Milvus
Embedding model loaded.
Technology: Node
Stack: Backend
Description: Node.js is an open-source, cross-platform JavaScript runtime environment...

Technology: Django
Stack: Backend
Description: Django is a high-level Python web framework...

Technology: Express
Stack: Backend
Description: Express.js is a web application framework for Node.js...



In [19]:
def main():
    insert_data()
    while True:
        query = input("\nEnter your search query (or type 'exit' to quit): ")
        if query.lower() == "exit":
            break
        search_technology(query)

if __name__ == "__main__":
    main()


Connected to Milvus
Embedding model loaded.
Inserted 6 records into 'technology_collection'.
Connected to Milvus
Embedding model loaded.
Technology: React
Stack: Frontend
Description: React is a JavaScript library for building user interfaces...

Technology: React
Stack: Frontend
Description: React is a JavaScript library for building user interfaces...

Technology: Express
Stack: Backend
Description: Express.js is a web application framework for Node.js...

