#### Quadrant Vec store

In [1]:
# Standard library imports
import os
import sys

# LangChain core - Document class
from langchain_core.documents import Document

# Qdrant imports
from langchain_qdrant import QdrantVectorStore
from qdrant_client import QdrantClient
from qdrant_client.models import Distance, VectorParams, Filter, FieldCondition, MatchValue ## Metadata filtering and index creation.

# Ollama embeddings
from langchain_ollama import OllamaEmbeddings

print("‚úì All imports loaded correctly!")
print("‚úì Using langchain_core.documents.Document (correct LangChain 1.0+ import)")

‚úì All imports loaded correctly!
‚úì Using langchain_core.documents.Document (correct LangChain 1.0+ import)


In [2]:
!ollama list

NAME                     ID              SIZE      MODIFIED    
gemma3:1b                8648f39daa8f    815 MB    4 weeks ago    
nomic-embed-text:v1.5    0a109f422b47    274 MB    4 weeks ago    


In [3]:
# Initialize Ollama embeddings
# This connects to your local Ollama service and uses the nomic-embed-text model

print("Initializing Ollama embeddings (nomic-embed-text)...")
print("Make sure Ollama is running: 'ollama serve'\n")

embeddings = OllamaEmbeddings(model="nomic-embed-text:v1.5")

print("‚úì Ollama embeddings initialized")
print("  Model: nomic-embed-text")
print("  Dimension: 768")

Initializing Ollama embeddings (nomic-embed-text)...
Make sure Ollama is running: 'ollama serve'

‚úì Ollama embeddings initialized
  Model: nomic-embed-text
  Dimension: 768


In [4]:
# Create sample documents with metadata
# The Document class comes from langchain_core.documents

sample_docs = [
    Document(
        page_content="RAG combines retrieval and generation",
        metadata={"topic": "rag", "difficulty": "intermediate"}
    ),
    Document(
        page_content="LangChain simplifies LLM applications",
        metadata={"topic": "langchain", "difficulty": "beginner"}
    ),
    Document(
        page_content="Vector databases enable semantic search",
        metadata={"topic": "vectordb", "difficulty": "intermediate"}
    )
]

print("‚úì Created 3 sample documents:")
for i, doc in enumerate(sample_docs, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

‚úì Created 3 sample documents:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate'}
  2. LangChain simplifies LLM applications
     Metadata: {'topic': 'langchain', 'difficulty': 'beginner'}
  3. Vector databases enable semantic search
     Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate'}


In [5]:
print("=" * 80)
print("QDRANT IN-MEMORY EXAMPLE")
print("=" * 80)
print()

# Step 1: Create in-memory Qdrant client
# The `:memory:` location means data is stored in RAM (not saved to disk)
qdrant_client_memory = QdrantClient(location=":memory:")

# Step 2: Create a collection
# - collection_name: identifier for this collection
# - size: must match embedding dimension (768 for nomic-embed-text)
# - distance: COSINE measures similarity (other options: DOT, EUCLID)
qdrant_client_memory.recreate_collection(
    collection_name="my_collection_memory",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

# Step 3: Create Qdrant vector store wrapper
# This LangChain wrapper makes it easy to work with Qdrant
qdrant_store_memory = QdrantVectorStore(
    client=qdrant_client_memory,
    collection_name="my_collection_memory",
    embedding=embeddings
)

# Step 4: Add documents to the store
# This will automatically:
# 1. Convert documents to embeddings using Ollama
# 2. Store embeddings + metadata in Qdrant
qdrant_store_memory.add_documents(sample_docs)
print("‚úì Added documents to Qdrant (in-memory)")
print("  Collection: my_collection_memory")
print("  Documents: 3")
print("  Storage: RAM (temporary)")

QDRANT IN-MEMORY EXAMPLE



  qdrant_client_memory.recreate_collection(


‚úì Added documents to Qdrant (in-memory)
  Collection: my_collection_memory
  Documents: 3
  Storage: RAM (temporary)


In [6]:
print("\n" + "-" * 80)
print("BASIC SIMILARITY SEARCH")
print("-" * 80)

# Search for documents similar to this query
# k=2 means return the top 2 most similar documents
results = qdrant_store_memory.similarity_search(
    "Tell me about RAG",
    k=2
)

print("\nQuery: 'Tell me about RAG'")
print("\nSearch results:")
for i, doc in enumerate(results, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Notice: The document about 'RAG combines retrieval...' is returned first")
print("   because it's semantically most similar to our query!")


--------------------------------------------------------------------------------
BASIC SIMILARITY SEARCH
--------------------------------------------------------------------------------

Query: 'Tell me about RAG'

Search results:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '06bad21381fe4110ab9e54e7c42325c1', '_collection_name': 'my_collection_memory'}
  2. Vector databases enable semantic search
     Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '4e8db7b581a642708057722df73d8204', '_collection_name': 'my_collection_memory'}

üí° Notice: The document about 'RAG combines retrieval...' is returned first
   because it's semantically most similar to our query!


In [7]:
results

[Document(metadata={'topic': 'rag', 'difficulty': 'intermediate', '_id': '06bad21381fe4110ab9e54e7c42325c1', '_collection_name': 'my_collection_memory'}, page_content='RAG combines retrieval and generation'),
 Document(metadata={'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '4e8db7b581a642708057722df73d8204', '_collection_name': 'my_collection_memory'}, page_content='Vector databases enable semantic search')]

In [9]:
print("\n" + "-" * 80)
print("SEARCH WITH METADATA FILTER")
print("-" * 80)
#metadata={"topic": ["rag", "llms", "agents"]}
# Create a filter to only search documents with topic='rag'
# Note: We use 'metadata.topic' because metadata is nested
qdrant_filter = Filter(
    must=[
        FieldCondition(
            key="metadata.topic",
            match=MatchValue(value="rag")
        )
    ]
)

# Same search, but only among filtered documents
results_filtered = qdrant_store_memory.similarity_search(
    "Tell me about RAG",
    k=2,
    filter=qdrant_filter
)

print("\nQuery: 'Tell me about RAG'")
print("Filter: topic='rag'")
print("\nFiltered search results:")
for i, doc in enumerate(results_filtered, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Only documents with topic='rag' are returned!")


--------------------------------------------------------------------------------
SEARCH WITH METADATA FILTER
--------------------------------------------------------------------------------

Query: 'Tell me about RAG'
Filter: topic='rag'

Filtered search results:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '06bad21381fe4110ab9e54e7c42325c1', '_collection_name': 'my_collection_memory'}

üí° Only documents with topic='rag' are returned!


In [10]:
print("\n" + "-" * 80)
print("MULTIPLE FILTERS EXAMPLE (AND LOGIC)")
print("-" * 80)

# Filter for documents where:
# - topic='rag' AND
# - difficulty='intermediate'
multi_filter = Filter(
    must=[
        FieldCondition(key="metadata.topic", match=MatchValue(value="rag")),
        FieldCondition(key="metadata.difficulty", match=MatchValue(value="intermediate"))
    ]
)

# You can use this filter in similarity_search:
# results_multi = qdrant_store_memory.similarity_search("RAG", k=2, filter=multi_filter)

print("\nFilter structure:")
print("  must=[")
print("    FieldCondition(key='metadata.topic', match='rag'),")
print("    FieldCondition(key='metadata.difficulty', match='intermediate')")
print("  ]")
print("\nüí° Both conditions must be true (AND logic)")
print("üí° For OR logic, use should=[...] instead of must=[...]")


--------------------------------------------------------------------------------
MULTIPLE FILTERS EXAMPLE (AND LOGIC)
--------------------------------------------------------------------------------

Filter structure:
  must=[
    FieldCondition(key='metadata.topic', match='rag'),
    FieldCondition(key='metadata.difficulty', match='intermediate')
  ]

üí° Both conditions must be true (AND logic)
üí° For OR logic, use should=[...] instead of must=[...]


In [11]:
# Same search, but only among filtered documents
results_filtered = qdrant_store_memory.similarity_search(
    "Tell me about RAG",
    k=2,
    filter=multi_filter
)

print("\nQuery: 'Tell me about RAG'")
print("Filter: topic='rag'")
print("\nFiltered search results:")
for i, doc in enumerate(results_filtered, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Only documents with topic='rag' are returned!")


Query: 'Tell me about RAG'
Filter: topic='rag'

Filtered search results:
  1. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '06bad21381fe4110ab9e54e7c42325c1', '_collection_name': 'my_collection_memory'}

üí° Only documents with topic='rag' are returned!


In [12]:
print("\n" + "=" * 80)
print("QDRANT WITH LOCAL PERSISTENCE")
print("=" * 80)
print()

# Step 1: Specify a local directory for storage
qdrant_path = "./qdrant_data"

# Step 2: Create persistent Qdrant client
# Data will be saved in the ./qdrant_data directory
qdrant_client_persistent = QdrantClient(path=qdrant_path)

# Step 3: Create collection (same as before)
qdrant_client_persistent.recreate_collection(
    collection_name="my_collection_persistent",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
)

# Step 4: Create vector store wrapper
qdrant_store_persistent = QdrantVectorStore(
    client=qdrant_client_persistent,
    collection_name="my_collection_persistent",
    embedding=embeddings
)

# Step 5: Add documents
qdrant_store_persistent.add_documents(sample_docs)
print(f"‚úì Added documents to Qdrant (persistent)")
print(f"  Storage location: {qdrant_path}")
print(f"  Collection: my_collection_persistent")
print(f"  ‚ö†Ô∏è  Data will persist even after this script ends!")

# Step 6: Search
results = qdrant_store_persistent.similarity_search(
    "Tell me about LangChain",
    k=2
)

print("\nQuery: 'Tell me about LangChain'")
print("\nSearch results:")
for i, doc in enumerate(results, 1):
    print(f"  {i}. {doc.page_content}")
    print(f"     Metadata: {doc.metadata}")

print("\nüí° Next time you run this, you can load the same data from disk!")


QDRANT WITH LOCAL PERSISTENCE



  qdrant_client_persistent.recreate_collection(


‚úì Added documents to Qdrant (persistent)
  Storage location: ./qdrant_data
  Collection: my_collection_persistent
  ‚ö†Ô∏è  Data will persist even after this script ends!

Query: 'Tell me about LangChain'

Search results:
  1. LangChain simplifies LLM applications
     Metadata: {'topic': 'langchain', 'difficulty': 'beginner', '_id': '75c0a44296694e79aa09f56c8d4e891f', '_collection_name': 'my_collection_persistent'}
  2. RAG combines retrieval and generation
     Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': 'a6894c57eddc494889e351dd6b4ff3ef', '_collection_name': 'my_collection_persistent'}

üí° Next time you run this, you can load the same data from disk!


In [13]:
print("\n" + "=" * 80)
print("QDRANT FROM_DOCUMENTS (RECOMMENDED METHOD)")
print("=" * 80)
print()

# Create Qdrant store directly from documents
# This is the easiest way - everything happens in one call!
qdrant_store_easy = QdrantVectorStore.from_documents(
    documents=sample_docs,          # Your documents
    embedding=embeddings,            # Embedding function
    path="./qdrant_easy",           # Local persistence (optional)
    collection_name="rag_collection" # Collection name
)

print("‚úì Created Qdrant store from documents")
print("  Collection: rag_collection")
print("  Storage: ./qdrant_easy")
print("  Documents: 3")
print("\nüí° This is the recommended approach for most use cases!")


QDRANT FROM_DOCUMENTS (RECOMMENDED METHOD)

‚úì Created Qdrant store from documents
  Collection: rag_collection
  Storage: ./qdrant_easy
  Documents: 3

üí° This is the recommended approach for most use cases!


In [14]:
# Search with scores
results_with_scores = qdrant_store_easy.similarity_search_with_score(
    "Vector databases",
    k=3
)

print("\nQuery: 'Vector databases'")
print("\nSearch results with similarity scores:")
print()
for doc, score in results_with_scores:
    print(f"  Score: {score:.4f}")  # Similarity score (higher = more similar)
    print(f"  Content: {doc.page_content}")
    print(f"  Metadata: {doc.metadata}")
    print()

print("üí° Scores help you filter out low-quality results")
print("üí° You can set a threshold (e.g., only return results with score > 0.7)")


Query: 'Vector databases'

Search results with similarity scores:

  Score: 0.7914
  Content: Vector databases enable semantic search
  Metadata: {'topic': 'vectordb', 'difficulty': 'intermediate', '_id': '274e4b892f134968bfbb4a41ba407572', '_collection_name': 'rag_collection'}

  Score: 0.4856
  Content: RAG combines retrieval and generation
  Metadata: {'topic': 'rag', 'difficulty': 'intermediate', '_id': '73d2736a24ad477c97d6ed42370bdeab', '_collection_name': 'rag_collection'}

  Score: 0.3989
  Content: LangChain simplifies LLM applications
  Metadata: {'topic': 'langchain', 'difficulty': 'beginner', '_id': '264d12527d634f748051b0a5b380cdac', '_collection_name': 'rag_collection'}

üí° Scores help you filter out low-quality results
üí° You can set a threshold (e.g., only return results with score > 0.7)


#### Weaviate Vec-store


In [18]:
from weaviate.classes.query import Filter


print("\n" + "=" * 80)
print("WEAVIATE LOCAL VECTOR STORE EXAMPLE")
print("=" * 80)
print()
print("‚ö†Ô∏è  Note: This requires Weaviate running locally on port 8080")
print("   If not running, you'll see connection errors (that's OK for learning!)")
print()

try:
    import weaviate
    from langchain_weaviate import WeaviateVectorStore
    
    print("-" * 80)
    print("Connecting to Local Weaviate")
    print("-" * 80)
    
    # Step 1: Connect to local Weaviate instance
    # This assumes Weaviate is running on localhost:8080
    weaviate_client = weaviate.connect_to_local(
        host="localhost",
        port=8080,
        grpc_port=50051
    )
    
    print("‚úì Connected to local Weaviate")
    print("  Host: localhost:8080")
    print("  gRPC Port: 50051")
    
    # Step 2: Create Weaviate vector store
    print("\n" + "-" * 80)
    print("Creating Weaviate Vector Store")
    print("-" * 80)
    
    weaviate_store = WeaviateVectorStore(
        client=weaviate_client,
        index_name="MyDocuments",  # Collection name in Weaviate
        text_key="text",            # Field name for document text
        embedding=embeddings
    )
    
    # Step 3: Add documents
    weaviate_store.add_documents(sample_docs)
    print("‚úì Added documents to Weaviate")
    print("  Index: MyDocuments")
    print("  Documents: 3")
    
    # Step 4: Basic Search
    print("\n" + "-" * 80)
    print("Basic Search")
    print("-" * 80)
    
    results = weaviate_store.similarity_search(
        "Tell me about RAG",
        k=2
    )
    
    print("\nQuery: 'Tell me about RAG'")
    print("\nSearch results:")
    for i, doc in enumerate(results, 1):
        print(f"  {i}. {doc.page_content}")
        print(f"     Metadata: {doc.metadata}")
    
    # Step 5: Search with Metadata Filter
    print("\n" + "-" * 80)
    print("Search with Metadata Filter")
    print("-" * 80)
    
    # Weaviate uses where_filter with different syntax
    results_filtered = weaviate_store.similarity_search("Tell me about databases", 
    k=2, 
    filters=Filter.by_property("difficulty").equal("intermediate") ) # Proper Filter object
    
    print("\nQuery: 'Tell me about databases'")
    print("Filter: difficulty='intermediate'")
    print("\nFiltered search results:")
    for i, doc in enumerate(results_filtered, 1):
        print(f"  {i}. {doc.page_content}")
        print(f"     Metadata: {doc.metadata}")
    
    # Step 6: Search with Scores
    print("\n" + "-" * 80)
    print("Search with Scores")
    print("-" * 80)
    
    results_with_scores = weaviate_store.similarity_search_with_score(
        "Vector databases",
        k=3
    )
    
    print("\nQuery: 'Vector databases'")
    print("\nSearch results with scores:")
    for doc, score in results_with_scores:
        print(f"  Score: {score:.4f}")
        print(f"  Content: {doc.page_content}")
        print(f"  Metadata: {doc.metadata}")
        print()
    
    # Step 7: Alternative - Create from Documents
    print("-" * 80)
    print("Creating Weaviate from Documents (Alternative Method)")
    print("-" * 80)
    
    weaviate_store_easy = WeaviateVectorStore.from_documents(
        documents=sample_docs,
        embedding=embeddings,
        client=weaviate_client,
        index_name="EasyDocuments"
    )
    
    print("‚úì Created Weaviate store from documents")
    
    # Quick search
    results = weaviate_store_easy.similarity_search("LangChain", k=2)
    print("\nQuick search results:")
    for i, doc in enumerate(results, 1):
        print(f"  {i}. {doc.page_content}")
    
    # Clean up
    weaviate_client.close()
    print("\n‚úì Closed Weaviate connection")
    
except Exception as e:
    print(f"‚úó Weaviate error: {e}")
    print()
    print("Troubleshooting:")
    print("1. Check if Weaviate is running: docker ps")
    print("2. Start Weaviate: docker run -d -p 8080:8080 -p 50051:50051 \\")
    print("     --name weaviate cr.weaviate.io/semitechnologies/weaviate:latest")
    print("3. Check if port 8080 is available: lsof -i :8080")
    print("4. Check Weaviate logs: docker logs weaviate")
    print()
    print("üí° It's OK if this doesn't work - you can still learn from the code!")


WEAVIATE LOCAL VECTOR STORE EXAMPLE

‚ö†Ô∏è  Note: This requires Weaviate running locally on port 8080
   If not running, you'll see connection errors (that's OK for learning!)

--------------------------------------------------------------------------------
Connecting to Local Weaviate
--------------------------------------------------------------------------------
‚úì Connected to local Weaviate
  Host: localhost:8080
  gRPC Port: 50051

--------------------------------------------------------------------------------
Creating Weaviate Vector Store
--------------------------------------------------------------------------------
‚úì Added documents to Weaviate
  Index: MyDocuments
  Documents: 3

--------------------------------------------------------------------------------
Basic Search
--------------------------------------------------------------------------------

Query: 'Tell me about RAG'

Search results:
  1. RAG combines retrieval and generation
     Metadata: {'difficulty': 