In [2]:
from langchain_openai import OpenAIEmbeddings
# from langchain.vectorstores import Chroma, Pinecone, FAISS  # Semi-Depricated
from langchain_chroma import Chroma
from langchain_pinecone import Pinecone

In [3]:
from langchain.schema import Document

# Create LangChain documents for IPL players

doc1 = Document(
        page_content="Virat Kohli is one of the most successful and consistent batsmen in IPL history. Known for his aggressive batting style and fitness, he has led the Royal Challengers Bangalore in multiple seasons.",
        metadata={"team": "Royal Challengers Bangalore"}
    )
doc2 = Document(
        page_content="Rohit Sharma is the most successful captain in IPL history, leading Mumbai Indians to five titles. He's known for his calm demeanor and ability to play big innings under pressure.",
        metadata={"team": "Mumbai Indians"}
    )
doc3 = Document(
        page_content="MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles. His finishing skills, wicketkeeping, and leadership are legendary.",
        metadata={"team": "Chennai Super Kings"}
    )
doc4 = Document(
        page_content="Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.",
        metadata={"team": "Mumbai Indians"}
    )
doc5 = Document(
        page_content="Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.",
        metadata={"team": "Chennai Super Kings"}
    )

docs = [doc1, doc2, doc3, doc4, doc5]


In [4]:
vector_store = Chroma(
    embedding_function=OpenAIEmbeddings(),  # here shape for each vector is (1,1536), so each plotted in 1536 dim
    persist_directory='my_chroma_db',
    collection_name='sample'
)

In [5]:
# add documents
vector_store.add_documents(docs)

['e2ad22a6-e343-4129-aa96-8c9ab0911b03',
 '1ed51445-2348-4e67-8c95-054acac2b7b9',
 'e22c5a03-f81c-44ef-bdc8-caac6979e4d5',
 '72713bf6-02d2-478a-a370-d8c5b4b04435',
 '0bed380b-9812-4838-8661-796e52fcdfbc']

In [6]:
# view documents
vector_store.get(include=['embeddings','documents', 'metadatas'])

{'ids': ['d12f7a8f-1cb1-4c9c-a359-8c2a7df8d1b9',
  '9c12f50d-d895-4424-a703-090ef64bd160',
  '3be3517b-cf86-44b9-a680-a43912017760',
  'fc7f96ea-bf65-49ac-90b7-2654ed603b5b',
  'e2ad22a6-e343-4129-aa96-8c9ab0911b03',
  '1ed51445-2348-4e67-8c95-054acac2b7b9',
  'e22c5a03-f81c-44ef-bdc8-caac6979e4d5',
  '72713bf6-02d2-478a-a370-d8c5b4b04435',
  '0bed380b-9812-4838-8661-796e52fcdfbc'],
 'embeddings': array([[-0.00545721, -0.01906604,  0.00708297, ..., -0.01629019,
         -0.00041194,  0.00727194],
        [ 0.00078373, -0.00476402,  0.01237088, ..., -0.01723875,
          0.0007744 ,  0.00293533],
        [-0.02713919,  0.00887096,  0.02693628, ..., -0.02585832,
          0.00902315, -0.0199993 ],
        ...,
        [ 0.00078373, -0.00476402,  0.01237088, ..., -0.01723875,
          0.0007744 ,  0.00293533],
        [-0.02713919,  0.00887096,  0.02693628, ..., -0.02585832,
          0.00902315, -0.0199993 ],
        [-0.0181433 ,  0.01275271,  0.03479322, ..., -0.03036547,
         -0

In [7]:
# search documents
vector_store.similarity_search(
    query='Who among these are a bowler?',
    k=2
)

[Document(id='3be3517b-cf86-44b9-a680-a43912017760', metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.'),
 Document(id='72713bf6-02d2-478a-a370-d8c5b4b04435', metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.')]

In [8]:
# search with similarity score
vector_store.similarity_search_with_score(
    query='Who among these are a bowler?',
    k=2
)

[(Document(id='3be3517b-cf86-44b9-a680-a43912017760', metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.'),
  0.35432544350624084),
 (Document(id='72713bf6-02d2-478a-a370-d8c5b4b04435', metadata={'team': 'Mumbai Indians'}, page_content='Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. Playing for Mumbai Indians, he is known for his yorkers and death-over expertise.'),
  0.35432544350624084)]

In [9]:
# meta-data filtering
vector_store.similarity_search_with_score(
    query="",
    filter={"team": "Chennai Super Kings"}
)

[(Document(id='9c12f50d-d895-4424-a703-090ef64bd160', metadata={'team': 'Chennai Super Kings'}, page_content='MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles. His finishing skills, wicketkeeping, and leadership are legendary.'),
  0.6488070487976074),
 (Document(id='e22c5a03-f81c-44ef-bdc8-caac6979e4d5', metadata={'team': 'Chennai Super Kings'}, page_content='MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles. His finishing skills, wicketkeeping, and leadership are legendary.'),
  0.6488070487976074),
 (Document(id='fc7f96ea-bf65-49ac-90b7-2654ed603b5b', metadata={'team': 'Chennai Super Kings'}, page_content='Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.'),
  0.6565988659858704),
 (Document(id='0bed380b-9812-4838-8661-796e52fcdfbc', metadata={'team': 'Chennai S

In [10]:
# update documents
updated_doc1 = Document(
    page_content="Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistent batting performances. He holds the record for the most runs in IPL history, including multiple centuries in a single season. Despite RCB not winning an IPL title under his captaincy, Kohli's passion and fitness set a benchmark for the league. His ability to chase targets and anchor innings has made him one of the most dependable players in T20 cricket.",
    metadata={"team": "Royal Challengers Bangalore"}
)

vector_store.update_document(document_id='d12f7a8f-1cb1-4c9c-a359-8c2a7df8d1b9', document=updated_doc1)


In [11]:
# view documents
vector_store.get()

{'ids': ['d12f7a8f-1cb1-4c9c-a359-8c2a7df8d1b9',
  '9c12f50d-d895-4424-a703-090ef64bd160',
  '3be3517b-cf86-44b9-a680-a43912017760',
  'fc7f96ea-bf65-49ac-90b7-2654ed603b5b',
  'e2ad22a6-e343-4129-aa96-8c9ab0911b03',
  '1ed51445-2348-4e67-8c95-054acac2b7b9',
  'e22c5a03-f81c-44ef-bdc8-caac6979e4d5',
  '72713bf6-02d2-478a-a370-d8c5b4b04435',
  '0bed380b-9812-4838-8661-796e52fcdfbc'],
 'embeddings': None,
 'documents': ["Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistent batting performances. He holds the record for the most runs in IPL history, including multiple centuries in a single season. Despite RCB not winning an IPL title under his captaincy, Kohli's passion and fitness set a benchmark for the league. His ability to chase targets and anchor innings has made him one of the most dependable players in T20 cricket.",
  'MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titl

In [12]:
# delete document
vector_store.delete(ids=['3a6ba33a-8e03-48bf-a308-a1b9ce73f1af'])

In [13]:
# view documents
vector_store.get()

{'ids': ['d12f7a8f-1cb1-4c9c-a359-8c2a7df8d1b9',
  '9c12f50d-d895-4424-a703-090ef64bd160',
  '3be3517b-cf86-44b9-a680-a43912017760',
  'fc7f96ea-bf65-49ac-90b7-2654ed603b5b',
  'e2ad22a6-e343-4129-aa96-8c9ab0911b03',
  '1ed51445-2348-4e67-8c95-054acac2b7b9',
  'e22c5a03-f81c-44ef-bdc8-caac6979e4d5',
  '72713bf6-02d2-478a-a370-d8c5b4b04435',
  '0bed380b-9812-4838-8661-796e52fcdfbc'],
 'embeddings': None,
 'documents': ["Virat Kohli, the former captain of Royal Challengers Bangalore (RCB), is renowned for his aggressive leadership and consistent batting performances. He holds the record for the most runs in IPL history, including multiple centuries in a single season. Despite RCB not winning an IPL title under his captaincy, Kohli's passion and fitness set a benchmark for the league. His ability to chase targets and anchor innings has made him one of the most dependable players in T20 cricket.",
  'MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titl

## Working with Qdrant
### Why I Prefer Qdrant for Vector Store

- ✅ **Native LangChain integration**  
  Works seamlessly with `langchain_qdrant` module.

- ⚡ **High performance & scalability**  
  Uses HNSW algorithm for fast and accurate vector search.

- 🐳 **Simple local deployment via Docker**  
  Easily spin up locally with persistence using a single Docker command.

- 📌 **Persistent storage**  
  Unlike Chroma (in-memory by default), Qdrant stores data on disk.

- 🔌 **Cloud-ready**  
  Can switch to Qdrant Cloud by changing `location` parameter. No major code changes.

- 🧠 **Better recall quality**  
  Optimized for LLM/RAG workloads with good search relevance.

- 🔁 **Metadata & payload support**  
  Supports metadata filtering and advanced querying natively.

- 🔄 **Update-friendly**  
  Easy to delete, overwrite, and re-upload documents without unexpected issues.

- 🔍 **Transparent inspection**  
  You can view all stored vectors and payloads via REST API or Qdrant UI.

start server - ```docker run -p 6333:6333 -v qdrant_data:/qdrant/storage qdrant/qdrant```

In [None]:
from langchain_qdrant import Qdrant
from langchain_openai import OpenAIEmbeddings
from qdrant_client import QdrantClient
from qdrant_client.http.models import VectorParams, Distance

# Create EMPTY vector store (not available via lc)

# Step 1: Connect to Qdrant
client = QdrantClient(host="localhost", port=6333)

# Step 2: Create an empty collection manually
collection_name = "ipl_players"
if client.collection_exists(collection_name):
    client.delete_collection(collection_name)

client.create_collection(
    collection_name=collection_name,
    vectors_config=VectorParams(
        size=1536,  # for OpenAI embeddings
        distance=Distance.COSINE,
    )
)

# Step 3: Initialize LangChain vector store (no documents yet)
vector_store = Qdrant(
    client=client,
    collection_name=collection_name,
    embeddings=OpenAIEmbeddings()
)

# Add documents
vector_store.add_documents(docs)

# Search documents
results = vector_store.similarity_search(
    query='Who among these are a bowler?',
    k=2
)

# Search with score
results_with_score = vector_store.similarity_search_with_score(
    query='Who among these are a bowler?',
    k=2
)

# Metadata filtering
csk_players = vector_store.similarity_search_with_score(
    query="",
    filter={"team": "Chennai Super Kings"}
)

# Delete documents (requires document ids from search results)
# vector_store.delete_documents([doc_id])

# Note: Qdrant handles updates by deleting and re-inserting documents
# NOT available - vector_store.update_document(document_id=…, document=updated_doc1)
# To update, you would:
# 1. Delete the old document
# 2. Add the new document

# get(include=…) not available. use
# QdrantClient.scroll()

In [16]:
csk_players

[(Document(metadata={'team': 'Chennai Super Kings', '_id': 'f7512f7b-f32c-422d-92b1-8bb0607d9e95', '_collection_name': 'ipl_players'}, page_content='MS Dhoni, famously known as Captain Cool, has led Chennai Super Kings to multiple IPL titles. His finishing skills, wicketkeeping, and leadership are legendary.'),
  0.6757109),
 (Document(metadata={'team': 'Chennai Super Kings', '_id': '626eaab9-de4c-40cd-a427-88437bd269cb', '_collection_name': 'ipl_players'}, page_content='Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.'),
  0.6718323)]

In [19]:
# Fetch all points from the collection
points = client.scroll(
    collection_name=collection_name,
    with_payload=True,
    with_vectors=True,
    limit=100
)

for point in points[0]:  # points = (List[PointStruct], next_offset)
    print(f"id: {point.id}")
    print(f"vector: {point.vector[:5]}...")  # Print part of the vector
    print(f"payload: {point.payload}")
    print("=" * 40)

id: 1f12d243-6ab4-4c13-bb2d-7a3e94d5d3ad
vector: [-0.0021045282, -0.0021428505, 0.026800001, 0.012601621, -0.030453386]...
payload: {'page_content': 'Virat Kohli is one of the most successful and consistent batsmen in IPL history. Known for his aggressive batting style and fitness, he has led the Royal Challengers Bangalore in multiple seasons.', 'metadata': {'team': 'Royal Challengers Bangalore'}}
id: 52960293-3a14-4a30-bd60-de4a7a643ed4
vector: [-0.018104512, 0.012812021, 0.0347942, 0.02491663, -0.05048827]...
payload: {'page_content': 'Ravindra Jadeja is a dynamic all-rounder who contributes with both bat and ball. Representing Chennai Super Kings, his quick fielding and match-winning performances make him a key player.', 'metadata': {'team': 'Chennai Super Kings'}}
id: cbcf9d03-8850-4bb1-a491-f861fc480aae
vector: [-0.02714536, 0.008853954, 0.026993142, 0.017187834, -0.02762738]...
payload: {'page_content': 'Jasprit Bumrah is considered one of the best fast bowlers in T20 cricket. P