## **Vector Stores Tutorial: Qdrant & Weaviate**

In [None]:
!uv pip install langchain-qdrant qdrant-client

Note: you may need to restart the kernel to use updated packages.


f:\sourab\rag_practice\.venv\Scripts\python.exe: No module named uv


In [None]:
!uv pip install langchain_qdrant

Note: you may need to restart the kernel to use updated packages.


f:\sourab\rag_practice\.venv\Scripts\python.exe: No module named uv


In [None]:
!uv pip install langchain-weaviate weaviate-client

Note: you may need to restart the kernel to use updated packages.


f:\sourab\rag_practice\.venv\Scripts\python.exe: No module named uv


In [4]:
#pip install langchain-ollama
#pip install langchain-core

In [10]:
import os
import sys

from langchain_core.documents import Document
from langchain_openai import AzureOpenAIEmbeddings

from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Filter, FieldCondition, MatchValue, Distance

print("‚úì All imports loaded correctly!")
print("‚úì Using langchain_core.documents.Document (correct LangChain 1.0+ import)")

‚úì All imports loaded correctly!
‚úì Using langchain_core.documents.Document (correct LangChain 1.0+ import)


In [11]:
embeddings = AzureOpenAIEmbeddings(
    azure_endpoint=os.getenv("AZURE_OPENAI_ENDPOINT"),
    api_key=os.getenv("AZURE_OPENAI_API_KEY"),
    api_version=os.getenv("AZURE_OPENAI_API_VERSION"),
    model=os.getenv("AZURE_OPENAI_EMBEDDING_DEPLOYMENT_NAME")
)

In [13]:
sample_docs = [
    Document(
        page_content="RAG combines retrieval and generation",
        metadata={"topic": "rag", "difficulty": "intermediate"}
    ),
    Document(
        page_content="LangChain simplifies LLM applications",
        metadata={"topic": "langchain", "difficulty": "beginner"}
    ),
    Document(
        page_content="Vector databases enable semantic search",
        metadata={"topic": "vectordb", "difficulty": "intermediate"}
    )
]

print("‚úì Created 3 sample documents:")
for i, doc in enumerate(sample_docs, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

‚úì Created 3 sample documents:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate'}
  2. LangChain simplifies LLM applications
     Metadata: {'topic': 'langchain', 'difficulty': 'beginner'}
  3. Vector databases enable semantic search
     Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate'}


### **Part 1: Qdrant Vector Store**

In [8]:
print("=" * 80)
print("QDRANT IN-MEMORY EXAMPLE")
print("=" * 80)
print()

qdrant_client_memory = QdrantClient(location=":memory:")
qdrant_client_memory.recreate_collection(
    collection_name="my_collection_memory",
    vectors_config=VectorParams(
        size=1536,
        distance=Distance.COSINE,
    )
)

qdrant_store_memory = QdrantVectorStore(
    client=qdrant_client_memory,
    collection_name="my_collection_memory",
    embedding=embeddings
)

qdrant_store_memory.add_documents(sample_docs)

print("‚úì Added documents to Qdrant (in-memory)")
print("  Collection: my_collection_memory")
print("  Documents: 3")
print("  Storage: RAM (temporary)")

QDRANT IN-MEMORY EXAMPLE



  qdrant_client_memory.recreate_collection(


‚úì Added documents to Qdrant (in-memory)
  Collection: my_collection_memory
  Documents: 3
  Storage: RAM (temporary)


In [9]:
print("\n" + "-" * 80)
print("BASIC SIMILARITY SEARCH")
print("-" * 80)

results = qdrant_store_memory.similarity_search(
    "Tell me about Rag",
    k=2
)

print("\nQuery: 'Tell me about RAG'")
print("\nSearch results:")
for i, doc in enumerate(results, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Notice: The document about 'RAG combines retrieval...' is returned first")
print("   because it's semantically most similar to our query!")


--------------------------------------------------------------------------------
BASIC SIMILARITY SEARCH
--------------------------------------------------------------------------------

Query: 'Tell me about RAG'

Search results:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '16587108236b4a4699e52a59a80a676d', '_collection_name': 'my_collection_memory'}
  2. Vector databases enable semantic search
     Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '3d90f26af8814bf190cbb8d66f598c1e', '_collection_name': 'my_collection_memory'}

üí° Notice: The document about 'RAG combines retrieval...' is returned first
   because it's semantically most similar to our query!


In [10]:
print("\n" + "-" * 80)
print("SEARCH WITH METADATA FILTER")
print("-" * 80)

qdrant_filter = Filter(
    must=[
        FieldCondition(
            key="metadata.topic",
            match=MatchValue(value="rag")
        )
    ]
)
results_filtered = qdrant_store_memory.similarity_search(
    "Tell me about RAG",
    k=2,
    filter=qdrant_filter
)

print("\nQuery: 'Tell me about RAG'")
print("Filter: topic='rag'")
print("\nFiltered search results:")
for i, doc in enumerate(results_filtered, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Only documents with topic='rag' are returned!")


--------------------------------------------------------------------------------
SEARCH WITH METADATA FILTER
--------------------------------------------------------------------------------

Query: 'Tell me about RAG'
Filter: topic='rag'

Filtered search results:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '16587108236b4a4699e52a59a80a676d', '_collection_name': 'my_collection_memory'}

üí° Only documents with topic='rag' are returned!


In [11]:
print("\n" + "-" * 80)
print("SEARCH WITH METADATA FILTER")
print("-" * 80)

qdrant_filter = Filter(
    must=[
        FieldCondition(
            key="metadata.topic",
            match=MatchValue(value="rag")
        )
    ]
)
results_filtered = qdrant_store_memory.similarity_search(
    "Tell me about RAG",
    k=2,
    filter=qdrant_filter
)

print("\nQuery: 'Tell me about RAG'")
print("Filter: topic='rag'")
print("\nFiltered search results:")
for i, doc in enumerate(results_filtered, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Only documents with topic='rag' are returned!")


--------------------------------------------------------------------------------
SEARCH WITH METADATA FILTER
--------------------------------------------------------------------------------

Query: 'Tell me about RAG'
Filter: topic='rag'

Filtered search results:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '16587108236b4a4699e52a59a80a676d', '_collection_name': 'my_collection_memory'}

üí° Only documents with topic='rag' are returned!


### **Multiple Filter Conditions (AND Logic)**

In [14]:
multi_filter = Filter(
    must=[
        FieldCondition(
            key="metadata.topic",
            match=MatchValue(value='rag')
        ),
        FieldCondition(
            key='metadata.topic',
            match=MatchValue(value="intermediate")
        )
    ]
)

results_filtered = qdrant_store_memory.similarity_search(
    "Tell me about RAG",
    k=2,
    filter=multi_filter
)

print("\nQuery: 'Tell me about RAG'")
print("Filter: topic='rag'")
print("\nFiltered search results:")
for i, doc in enumerate(results_filtered, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Only documents with topic='rag' are returned!")


Query: 'Tell me about RAG'
Filter: topic='rag'

Filtered search results:

üí° Only documents with topic='rag' are returned!


### **Qdrant with Local Persistence**

In [15]:
print("\n" + "=" * 80)
print("QDRANT WITH LOCAL PERSISTENCE")
print("=" * 80)
print()

qdrant_path = "./qdrant_data"
qdrant_client_persistent = QdrantClient(path=qdrant_path)
qdrant_client_persistent.recreate_collection(
    collection_name="my_collection_persistent",
    vectors_config=VectorParams(
        size=1536,
        distance=Distance.COSINE
    ),
)
qdrant_store_persistent = QdrantVectorStore(
    client=qdrant_client_persistent,
    collection_name="my_collection_persistent",
    embedding=embeddings
)

qdrant_store_persistent.add_documents(sample_docs)
print(f"‚úì Added documents to Qdrant (persistent)")
print(f"  Storage location: {qdrant_path}")
print(f"  Collection: my_collection_persistent")
print(f"  ‚ö†Ô∏è  Data will persist even after this script ends!")

results = qdrant_store_persistent.similarity_search(
    "Tell me about Langchain",
    k=2
)


print("\nQuery: 'Tell me about LangChain'")
print("\nSearch results:")
for i, doc in enumerate(results, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Next time you run this, you can load the same data from disk!")


QDRANT WITH LOCAL PERSISTENCE



  qdrant_client_persistent.recreate_collection(


‚úì Added documents to Qdrant (persistent)
  Storage location: ./qdrant_data
  Collection: my_collection_persistent
  ‚ö†Ô∏è  Data will persist even after this script ends!

Query: 'Tell me about LangChain'

Search results:
  1. LangChain simplifies LLM applications
     Metadata: {'topic': 'langchain', 'difficulty': 'beginner', '_id': 'a97d815f6f3d4979a26fd17e1f43f6dc', '_collection_name': 'my_collection_persistent'}
  2. Vector databases enable semantic search
     Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '839a989af29d4468b6fc1f218b8b2abf', '_collection_name': 'my_collection_persistent'}

üí° Next time you run this, you can load the same data from disk!


###  **Qdrant from_documents**

In [16]:

print("\n" + "=" * 80)
print("QDRANT FROM_DOCUMENTS (RECOMMENDED METHOD)")
print("=" * 80)
print()

qdrant_store_easy = QdrantVectorStore.from_documents(
    documents=sample_docs,
    embedding=embeddings,
    path="./qdrant_easy",
    collection_name="rag_collection"
)

print("‚úì Created Qdrant store from documents")
print("  Collection: rag_collection")
print("  Storage: ./qdrant_easy")
print("  Documents: 3")
print("\nüí° This is the recommended approach for most use cases!")


QDRANT FROM_DOCUMENTS (RECOMMENDED METHOD)

‚úì Created Qdrant store from documents
  Collection: rag_collection
  Storage: ./qdrant_easy
  Documents: 3

üí° This is the recommended approach for most use cases!


In [17]:
result_with_scores = qdrant_store_easy.similarity_search_with_score(
    "Vector database",
    k=3
)
print(result_with_scores)

print("\nQuery: 'Vector databases'")
print("\nSearch results with similarity scores:")
print()
for doc, score in result_with_scores:
    print(f"  Score: {score:.4f}")  # Similarity score (higher = more similar)
    print(f"  Content: {doc.page_content}")
    print(f"  Metadata: {doc.metadata}")
    print()

print("üí° Scores help you filter out low-quality results")
print("üí° You can set a threshold (e.g., only return results with score > 0.7)")

[(Document(metadata={'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '0f622ce5abfb4f15ae7be2026662a006', '_collection_name': 'rag_collection'}, page_content='Vector databases enable semantic search'), 0.90523565515644), (Document(metadata={'topic': 'langchain', 'difficulty': 'beginner', '_id': '2f373810db264aa28480bd52e9f50f48', '_collection_name': 'rag_collection'}, page_content='LangChain simplifies LLM applications'), 0.7368039651195374), (Document(metadata={'topic': 'rag', 'difficulty': 'intermediate', '_id': 'bf119b1b2a7944a2925f6b8d0b36e1ee', '_collection_name': 'rag_collection'}, page_content='RAG combines retrieval and generation'), 0.7327351479175784)]

Query: 'Vector databases'

Search results with similarity scores:

  Score: 0.9052
  Content: Vector databases enable semantic search
  Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '0f622ce5abfb4f15ae7be2026662a006', '_collection_name': 'rag_collection'}

  Score: 0.7368
  Content: LangChain si

## **Part 2: Weaviate Vector Store**

In [17]:
from weaviate.classes.query import Filter

print("\n" + "=" * 80)
print("WEAVIATE LOCAL VECTOR STORE EXAMPLE")
print("=" * 80)
print()
print("‚ö†Ô∏è  Note: This requires Weaviate running locally on port 8080")
print("   If not running, you'll see connection errors (that's OK for learning!)")
print()

try:
    import weaviate
    from langchain_weaviate import WeaviateVectorStore

    print("-" * 80)
    print("Connecting to Local Weaviate")
    print("-" * 80)

    # Step 1: Connect to local Weaviate instance
    weaviate_client = weaviate.connect_to_local(
        host="localhost",
        port=8080,
        grpc_port=50051
    )
    
    print("‚úì Connected to local Weaviate")
    print("  Host: localhost:8080")
    print("  gRPC Port: 50051")


    # Step 2: Create Weaviate vector store
    print("\n" + "-" * 80)
    print("Creating Weaviate Vector Store")
    print("-" * 80)


    weaviate_store = WeaviateVectorStore(
        client=weaviate_client,
        index_name="MyDocument",
        text_key='text',
        embedding=embeddings
    )

    # Step 3: Add documents    
    weaviate_store.add_documents(sample_docs)

    print("‚úì Added documents to Weaviate")
    print("  Index: MyDocuments")
    print("  Documents: 3")
    
    # Step 4: Basic Search
    print("\n" + "-" * 80)
    print("Basic Search")
    print("-" * 80)

    result = weaviate_store.similarity_search(
        "Tell me about RAG",
        k=2
    )

    print("\nQuery: Tell me about RAG")
    print("\nSearch results: ")
    for i, doc in enumerate(result,1):
        print(f"{i}. {doc.page_content}")
        print(f"   Metadata: {doc.metadata}")

    # Step 5: Search with Metadata Filter
    print("\n" + "-" * 80)
    print("Search with Metadata Filter")
    print("-" * 80)

    # Weaviate uses where_filter with different syntax
    result_filtered = weaviate_store.similarity_search(
        "Tell me about databases",
        k=2,
        filters=Filter.by_property("difficulty").equal("intermediate")
    )
    print("\nQuery: 'Tell me about databases'")
    print("Filter: difficulty='intermediate'")
    print("\nFiltered search results:")

    for i, doc in enumerate(result_filtered,1):
        print(f"{i}. {doc.page_content}")
        print(f"    Metadata: {doc.metadata}")

    # Step 6: Search with Scores
    print("\n" + "-" * 80)
    print("Search with Scores")
    print("-" * 80)
    
    result_with_scores = weaviate_store.similarity_search_with_score(
        "vector databases",
        k=2
    )

    print("\nQuery: 'Vector databases'")
    print("\nSearch results with scores:")
    for doc,score in result_with_scores:
        print(f"   Score: {score:.4f}")
        print(f"   Content: {doc.page_content}")
        print(f"   Metadata: {doc.metadata}")

    # Step 7: Alternative - Create from Documents
    print("-" * 80)
    print("Creating Weaviate from Documents (Alternative Method)")
    print("-" * 80)

    weaviate_store = WeaviateVectorStore.from_documents(
        documents=sample_docs,
        embedding=embeddings,
        client=weaviate_client,
        index_name="EasyDocuments"
    )
    print("‚úì Created Weaviate store from documents")
    results = weaviate_store.similarity_search("Langchain", k=2)
    print("\nQuick search results:")
    for i, doc in enumerate(results, 1):
        print(f"  {i}. {doc.page_content}")

    weaviate_client.close()
    print("\n‚úì Closed Weaviate connection")



except Exception as e:
    print(f"‚úó Weaviate error: {e}")
    print()
    print("Troubleshooting:")
    print("1. Check if Weaviate is running: docker ps")
    print("2. Start Weaviate: docker run -d -p 8080:8080 -p 50051:50051 \\")
    print("     --name weaviate cr.weaviate.io/semitechnologies/weaviate:latest")
    print("3. Check if port 8080 is available: lsof -i :8080")
    print("4. Check Weaviate logs: docker logs weaviate")
    print()
    print("üí° It's OK if this doesn't work - you can still learn from the code!")


WEAVIATE LOCAL VECTOR STORE EXAMPLE

‚ö†Ô∏è  Note: This requires Weaviate running locally on port 8080
   If not running, you'll see connection errors (that's OK for learning!)

--------------------------------------------------------------------------------
Connecting to Local Weaviate
--------------------------------------------------------------------------------
‚úì Connected to local Weaviate
  Host: localhost:8080
  gRPC Port: 50051

--------------------------------------------------------------------------------
Creating Weaviate Vector Store
--------------------------------------------------------------------------------
‚úì Added documents to Weaviate
  Index: MyDocuments
  Documents: 3

--------------------------------------------------------------------------------
Basic Search
--------------------------------------------------------------------------------

Query: Tell me about RAG

Search results: 
1. RAG combines retrieval and generation
   Metadata: {'difficulty': 'inte