In [5]:
import pandas as pd
from pymilvus import connections, Collection
import json

try:
    # Step 1: Connect to Milvus
    print("Connecting to Milvus...")
    connections.connect(host='127.0.0.1', port='19530')  # Update host and port as necessary

    # Step 2: Load the Collection
    collection_name = "gaits_lecture_data_collection"
    collection = Collection(name=collection_name)

    # Step 3: Load Collection Data
    print("Loading collection data...")
    collection.load()

    # Query all data from the collection
    print("Querying data from Milvus...")
    results = collection.query(
        expr="id >= 0",  # Fetch all data (modify expr if you want specific filters)
        output_fields=[
            "id",
            "lecture_name",
            "video_name",
            "start_time",
            "end_time",
            "lecture_summary",
            "whole_video_summary",
            "lecture_embedding"
        ]
    )

    # Step 4: Convert Data for JSON Serialization
    print("Converting data for JSON serialization...")
    for record in results:
        if "lecture_embedding" in record:
            record["lecture_embedding"] = [float(x) for x in record["lecture_embedding"]]  # Convert to JSON-compatible float

    # Step 5: Convert Data to Pandas DataFrame
    print("Converting data to DataFrame...")
    df = pd.DataFrame(results)

    # Step 6: Save Data to a File
    print("Saving data to files...")

    # Save to JSON
    json_path = "milvus_extracted_data.json"
    with open(json_path, "w") as f:
        json.dump(results, f, indent=4)
    print(f"Data saved to {json_path}")

except Exception as e:
    print(f"An error occurred: {e}")

Connecting to Milvus...
Loading collection data...
Querying data from Milvus...
Converting data for JSON serialization...
Converting data to DataFrame...
Saving data to files...
Data saved to milvus_extracted_data.json
