In [2]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
import os
from dotenv import load_dotenv
from tqdm import tqdm
import logging

# Basic logging setup
logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [5]:
# Load environment variables and initialize Qdrant Cloud client
load_dotenv()
qdrant_url = os.getenv("QDRANT_URL")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
if not qdrant_url or not qdrant_api_key:
    raise ValueError("set QDRANT_URL and QDRANT_API_KEY in .env file")


In [4]:
# Initialize Qdrant client for Qdrant Cloud
qdrant_client = QdrantClient(
    url=qdrant_url,
    api_key=qdrant_api_key,
    prefer_grpc=True 
)
logger.info("Qdrant Cloud client initialized")

INFO:httpx:HTTP Request: GET https://a39cabe9-e04e-41bc-bfc8-c35e32b99b11.eu-west-2-0.aws.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
INFO:__main__:Qdrant Cloud client initialized


In [6]:
def migrate_to_qdrant(vectors, collection_name="research_vectors", vector_dimension=384, batch_size=100):
    """
    Migrate embeddings to Qdrant Cloud.

    Args:
        vectors: List of dicts with 'id', 'vector', and 'metadata'.
        collection_name: Qdrant collection name.
        vector_dimension: Vector dimension (default: 384).
        batch_size: Number of points to upsert per batch.
    """
    # Create collection if it doesn't exist
    if not qdrant_client.collection_exists(collection_name):
        qdrant_client.create_collection(
            collection_name=collection_name,
            vectors_config=VectorParams(size=vector_dimension, distance=Distance.COSINE)
        )
        logger.info(f"Created collection: {collection_name}")

    # Prepare points
    points = [
        PointStruct(
            id=str(v["id"]),
            vector=v["vector"],
            payload=v["metadata"]
        )
        for v in vectors if len(v["vector"]) == vector_dimension
    ]
    if not points:
        raise ValueError("No valid vectors to migrate")

    # Upsert points in batches with progress bar
    for i in tqdm(range(0, len(points), batch_size), desc="Migrating vectors"):
        qdrant_client.upsert(collection_name=collection_name, points=points[i:i + batch_size])

    # Verify migration
    count = qdrant_client.get_collection(collection_name).points_count
    logger.info(f"Migrated {count} vectors to Qdrant")

    # Sample query to verify
    if points:
        query_vector = points[0].vector
        results = qdrant_client.query_points(
            collection_name=collection_name,
            query=query_vector,
            limit=3
        ).points
        logger.info("Sample query results:")
        for r in results:
            logger.info(f"ID: {r.id}, Score: {r.score}, Metadata: {r.payload}")

In [7]:
# Define vectors from pinecone
vectors = [
    {
        "id": "337f6b38-f77c-41b8-a6d6-a6b1de842cfc",
        "vector": [0.0119110597, -0.0297347214, 0.0611310154] + [0.0] * 381, 
        "metadata": {
            "author": "agimeno",
            "creationdate": "2018-03-05T09:43:57+01:00",
            "creator": "Microsoft® Word 2016",
            "moddate": "2018-03-12T10:24:10-04:00",
            "page": 9.0,
            "page_label": "10",
            "producer": "Microsoft® Word 2016",
            "source": "E:\\AGILEFORCE\\Vector Database Migration\\data\\research.pdf",
            "text": "practices, eLearning Papers, 45, 4-14. ...",
            "total_pages": 11.0
        }
    },
  
]


migrate_to_qdrant(vectors=vectors, collection_name="research_vectors", vector_dimension=384)

Migrating vectors: 100%|██████████| 1/1 [00:00<00:00,  5.64it/s]
INFO:__main__:Migrated 1 vectors to Qdrant
INFO:__main__:Sample query results:
INFO:__main__:ID: 337f6b38-f77c-41b8-a6d6-a6b1de842cfc, Score: 0.9999998807907104, Metadata: {'moddate': '2018-03-12T10:24:10-04:00', 'creator': 'Microsoft® Word 2016', 'total_pages': 11.0, 'creationdate': '2018-03-05T09:43:57+01:00', 'producer': 'Microsoft® Word 2016', 'source': 'E:\\AGILEFORCE\\Vector Database Migration\\data\\research.pdf', 'author': 'agimeno', 'page': 9.0, 'page_label': '10', 'text': 'practices, eLearning Papers, 45, 4-14. ...'}
