In [6]:
from qdrant_client import QdrantClient
from qdrant_client.http.models import Distance, VectorParams, PointStruct
import os
from dotenv import load_dotenv
import logging
from typing import List, Dict, Any
import uuid


logging.basicConfig(level=logging.INFO)
logger = logging.getLogger(__name__)

In [7]:

load_dotenv()
logger.info("Environment variables loaded")

INFO:__main__:Environment variables loaded


In [8]:
# Load environment variables from .env file
load_dotenv()
qdrant_url = os.getenv("QDRANT_URL")
qdrant_api_key = os.getenv("QDRANT_API_KEY")
if not qdrant_url or not qdrant_api_key:
    raise ValueError("QDRANT_URL and QDRANT_API_KEY must be set in .env file")
logger.info("Environment variables loaded")

INFO:__main__:Environment variables loaded


In [None]:
# Initialize Qdrant client for Qdrant Cloud
qdrant_client = QdrantClient(
    url=qdrant_url,
    api_key=qdrant_api_key,
    prefer_grpc=True 
)
logger.info("Qdrant Cloud client initialized")

INFO:httpx:HTTP Request: GET https://a39cabe9-e04e-41bc-bfc8-c35e32b99b11.eu-west-2-0.aws.cloud.qdrant.io:6333 "HTTP/1.1 200 OK"
INFO:__main__:Qdrant Cloud client initialized


In [10]:
def migrate_to_qdrant(vectors: List[Dict[str, Any]], collection_name: str = "research_vectors", 
                      vector_dimension: int = 384, batch_size: int = 100) -> None:
    """
    Migrate embeddings from Pinecone to Qdrant Cloud.
    
    Args:
        vectors: List of dictionaries containing 'id', 'vector', and 'metadata'.
        collection_name: Name of the Qdrant collection.
        vector_dimension: Dimension of the vectors (default: 384 from Pinecone index).
        batch_size: Number of points to upsert in each batch.
    """
    try:
        # Create collection if it doesn't exist
        if not qdrant_client.collection_exists(collection_name):
            qdrant_client.create_collection(
                collection_name=collection_name,
                vectors_config=VectorParams(
                    size=vector_dimension,
                    distance=Distance.COSINE
                )
            )
            logger.info(f"Created Qdrant collection: {collection_name}")
        else:
            logger.info(f"Collection {collection_name} already exists")

        # Prepare points for Qdrant
        points = []
        for vector in vectors:
            # Validate vector data
            if len(vector["vector"]) != vector_dimension:
                logger.warning(f"Skipping vector {vector['id']}: Invalid dimension")
                continue
            # Ensure ID is a string
            point_id = str(vector["id"]) if isinstance(vector["id"], uuid.UUID) else vector["id"]
            points.append(
                PointStruct(
                    id=point_id,
                    vector=vector["vector"],
                    payload=vector["metadata"]
                )
            )

        # Upsert points in batches
        for i in range(0, len(points), batch_size):
            batch = points[i:i + batch_size]
            qdrant_client.upsert(
                collection_name=collection_name,
                points=batch
            )
            logger.info(f"Upserted batch {i // batch_size + 1} with {len(batch)} points")

        # Verify the number of points
        collection_info = qdrant_client.get_collection(collection_name=collection_name)
        logger.info(f"Total points in collection: {collection_info.points_count}")

        # Perform a sample similarity search to verify
        if points:
            query_vector = points[0].vector
            search_results = qdrant_client.search(
                collection_name=collection_name,
                query_vector=query_vector,
                limit=3
            )
            logger.info("Sample search results:")
            for result in search_results:
                logger.info(f"ID: {result.id}, Score: {result.score}, Metadata: {result.payload}")

    except Exception as e:
        logger.error(f"Error during migration: {str(e)}")
        raise

In [None]:
# Define vectors from pinecone
vectors = [
    {
        "id": "337f6b38-f77c-41b8-a6d6-a6b1de842cfc",
        "vector": [0.0119110597, -0.0297347214, 0.0611310154] + [0.0] * 381, 
        "metadata": {
            "author": "agimeno",
            "creationdate": "2018-03-05T09:43:57+01:00",
            "creator": "Microsoft® Word 2016",
            "moddate": "2018-03-12T10:24:10-04:00",
            "page": 9.0,
            "page_label": "10",
            "producer": "Microsoft® Word 2016",
            "source": "E:\\AGILEFORCE\\Vector Database Migration\\data\\research.pdf",
            "text": "practices, eLearning Papers, 45, 4-14. ...",
            "total_pages": 11.0
        }
    },
  
]


migrate_to_qdrant(vectors=vectors, collection_name="research_vectors", vector_dimension=384)

INFO:__main__:Created Qdrant collection: research_vectors
INFO:__main__:Upserted batch 1 with 1 points
INFO:__main__:Total points in collection: 1
  search_results = qdrant_client.search(
INFO:__main__:Sample search results:
INFO:__main__:ID: 337f6b38-f77c-41b8-a6d6-a6b1de842cfc, Score: 0.9999998807907104, Metadata: {'creator': 'Microsoft® Word 2016', 'moddate': '2018-03-12T10:24:10-04:00', 'creationdate': '2018-03-05T09:43:57+01:00', 'total_pages': 11.0, 'producer': 'Microsoft® Word 2016', 'source': 'E:\\AGILEFORCE\\Vector Database Migration\\data\\research.pdf', 'page_label': '10', 'page': 9.0, 'author': 'agimeno', 'text': 'practices, eLearning Papers, 45, 4-14. ...'}
