#### Creating a collection/table:

In [None]:
PUT /collections/rentals
{
  "vectors": {
    "size": 300,
    "distance": "Cosine"
  }
}

#### Insert/Write:
- This request demonstrates a batch operation to upload data points into a collection.

In [None]:
PUT /collections/rentals/points
{
  "batch": {
    "ids": [1, 2],
    "vectors": [
      [0.9, -0.5, ..., 0.0],
      [0.1, 0.4, ..., 0.3],
    ],
    "payload": [
      {"city": "Bangalore", "sqft": 990, "img_url": "example.com/rental1.jpg", "tags": ["..."]},
      {"city": "Hyderabad", "sqft": 1550, "img_url": "example.com/rental2.jpg", "description": "..."}
    ]
  }
}

#### Field indexing:
- This section explains how to create indices on payload fields to improve filtering efficiency.

In [None]:
// Vector indexing happens by default
// Each payload index adds more links to keep the graph connected for effective filtering
// Repeat for 'sqft' field with 'integer' type
PUT /collections/rentals/index
{
  "field_name": "city",
  "field_schema": {
    "type": "keyword"
  }
}

#### Search/Read:
- This demonstrates a vector search query combined with specific metadata filters.

In [None]:
POST /collections/rentals/points/search
{
  "query": [0.2, 0.3, ..., 0.4], // generated from user query (text using same model
  "filter": { "must": [ {"key": "city", "match": {"value": "Bangalore"}}, {"key": "sqft", "range": { "gte": 1000 }}]},
  "limit": 10
}

// Response:
[
  {"id": 4, "score": 0.56, "payload": {...}},
  {"id": 2, "score": 0.40, "payload": {...}},
  {"id": 5, "score": 0.23, "payload": {...}}
]

In [None]:
# Install the Qdrant client if it's not already installed
!uv pip install qdrant-client -q

#### Setup Qdrant Cloud Client

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance


# Qdrant Cloud URL and API Key
from google.colab import userdata
QDRANT_URL = userdata.get('QDRANT_URL')
QDRANT_API_KEY = userdata.get('QDRANT_API_KEY')

cloud_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)

cloud_client.create_collection(
    collection_name="testtest",
    vectors_config=VectorParams(size=4, distance=Distance.COSINE),
)

print("Connected to Qdrant Cloud and collection created!")

Connected to Qdrant Cloud and collection created!


#### Setup In-Memory Client

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Initialize client in memory
memory_client = QdrantClient(":memory:")

# Create a collection
memory_client.create_collection(
    collection_name="test_collection",
    vectors_config=VectorParams(size=4, distance=Distance.DOT),
)

print("In-memory collection created successfully!")

In-memory collection created successfully!


#### Setup Local Storage Client

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Initialize client and point to a local directory
# This will create a folder named 'qdrant_storage' in your Colab files
local_client = QdrantClient(path="./qdrant_storage")

local_client.create_collection(
    collection_name="local_collection",
    vectors_config=VectorParams(size=4, distance=Distance.COSINE),
)

print("Local persistent collection created in './qdrant_storage'")

Local persistent collection created in './qdrant_storage'


#### Insert Raw Vectors (Manual)

In [None]:
from qdrant_client.models import PointStruct

# 1. Prepare your data (Points)
# Each point needs: an ID, a Vector (list of floats), and optional Payload (metadata)
points = [
    PointStruct(
        id=1,
        vector=[0.05, 0.61, 0.76, 0.74],
        payload={"city": "Tunis", "event": "Hackathon"}
    ),
    PointStruct(
        id=2,
        vector=[0.19, 0.81, 0.75, 0.11],
        payload={"city": "London", "event": "Workshop"}
    ),
    PointStruct(
        id=3,
        vector=[0.36, 0.55, 0.47, 0.94],
        payload={"city": "Tunis", "event": "Demo"}
    ),
]

# 2. Perform the Upsert
# This works the same for your 'client', 'client_in_memory', or 'client_local'
cloud_client.upsert(
    collection_name="testtest",
    points=points
)

print("Points inserted successfully!")

Points inserted successfully!


#### For Qdrant Cloud

In [None]:
cloud_client.upsert(
    collection_name="cloud_collection",
    points=points
)

#### For In-Memory


In [None]:
memory_client.upsert(
    collection_name="memory_collection",
    points=points
)

#### For Local Path

In [None]:
local_client.upsert(
    collection_name="local_collection",
    points=points
)

In [None]:
!uv pip install -q sentence-transformers

#### Embedding Model

In [None]:
from sentence_transformers import SentenceTransformer

# Initialize the model (This will download it the first time)
model = SentenceTransformer('all-MiniLM-L6-v2')

# Note: This model produces vectors of size 384.
# You MUST ensure your collection size matches this (384)

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [None]:
EMBEDDING_SIZE= len(model.encode("Hello World"))
print(f"embedding size equal to: {EMBEDDING_SIZE}")

embedding size equal to: 384


In [None]:
COLLECTION_NAME = "demo_day01"

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.models import VectorParams, Distance

# Qdrant Cloud URL and API Key
from google.colab import userdata
QDRANT_URL = userdata.get('QDRANT_URL')
QDRANT_API_KEY = userdata.get('QDRANT_API_KEY')

cloud_client = QdrantClient(
    url=QDRANT_URL,
    api_key=QDRANT_API_KEY,
)

cloud_client.create_collection(
    collection_name=COLLECTION_NAME,
    vectors_config=VectorParams(size=EMBEDDING_SIZE, distance=Distance.COSINE),
)

print("Connected to Qdrant Cloud and collection created!")

Connected to Qdrant Cloud and collection created!


In [None]:
from qdrant_client.models import PointStruct

# Sample text data
documents = [
    {"id": 1, "text": "we have some participants from sousse.", "city": "Sousse"},
    {"id": 2, "text": "we have some participants from sfax.", "city": "Sfax"},
    {"id": 3, "text": "hafedh talked about chonkie and qdrant.", "talk": "chonkie"},
    {"id": 4, "text": "eya talked about ADK and Qdrant MCP", "talk": "ADK"},
    {"id": 5, "text": "arbi talked about Qdrant.", "talk": "Qdrant"},
    {"id": 6, "text": "Qdrant event is huge.", "city": "Tunis"},
    {"id": 7, "text": "there is nothing we can do.", "person": "Napoleon"},
    {"id": 8, "text": "this talk is on discord.", "media": "discord"},
    {"id": 9, "text": "the media posts are on facebook.", "media": "facebook"},
    {"id": 10, "text": "Qdrant is a vector database.", "city": "Berlin"},
    {"id": 11, "text": "The hackathon is in Tunis.", "city": "Tunis"},
]

# Prepare points
points = []
for doc in documents:
    # Convert text to a list of floats (the embedding)
    vector = model.encode(doc["text"]).tolist()

    # Dynamically build payload to avoid KeyError
    payload_content = {"text": doc["text"]}
    if "city" in doc:
        payload_content["city"] = doc["city"]
    if "talk" in doc:
        payload_content["talk"] = doc["talk"]

    points.append(
        PointStruct(
            id=doc["id"],
            vector=vector,
            payload=payload_content
        )
    )

# Insert into Cloud
cloud_client.upsert(
    collection_name=COLLECTION_NAME,
    points=points
)

print(f"Successfully inserted {len(points)} text-based points.")

Successfully inserted 11 text-based points.


#### Delete Points by ID

In [None]:
from qdrant_client.models import PointIdsList

cloud_client.delete(
    collection_name=COLLECTION_NAME,
    points_selector=PointIdsList(
        points=[11, 10 , 9 ],  # List of IDs you want to remove
    ),
)
print("Points 11 , 10 , 9 deleted.")

Points 11 , 10 , 9 deleted.


#### Delete Points by Filter

In [None]:
from qdrant_client.models import Filter, FieldCondition, MatchValue, PayloadSchemaType

# Create a payload index for the 'city' field
cloud_client.create_payload_index(
    collection_name=COLLECTION_NAME,
    field_name="talk",
    field_schema=PayloadSchemaType.KEYWORD
)

cloud_client.delete(
    collection_name=COLLECTION_NAME,
    points_selector=Filter(
        must=[
            FieldCondition(key="talk", match=MatchValue(value="ADK")),
        ]
    ),
)
print("All pts about ADK deleted.")

All pts about ADK deleted.


#### Filter Search (Match Any)

In [None]:
from qdrant_client.models import Filter, FieldCondition, MatchAny

results = cloud_client.scroll(
    collection_name=COLLECTION_NAME,
    scroll_filter=Filter(
        must=[
            FieldCondition(
                key="talk",
                match=MatchAny(any=["chonkie", "ADK"])
            )
        ]
    )
)
results

([Record(id=3, payload={'text': 'hafedh talked about chonkie and qdrant.', 'talk': 'chonkie'}, vector=None, shard_key=None, order_value=None)],
 None)

#### Retrieve All "Talk" Documents

In [None]:
from qdrant_client.models import Filter, IsEmptyCondition, PayloadField

# This finds IDs 3, 4, and 5 (everything with a "talk" key)
points, _ = cloud_client.scroll(
    collection_name=COLLECTION_NAME,
    scroll_filter=Filter(
        must_not=[
            IsEmptyCondition(
                is_empty=PayloadField(key="talk")
            )
        ]
    ),
    with_payload=True
)

print(f"Total talks found: {len(points)}")
for p in points:
    print(f"- {p.payload['talk']}: {p.payload['text']}")

Total talks found: 2
- chonkie: hafedh talked about chonkie and qdrant.
- Qdrant: arbi talked about Qdrant.


In [None]:
!uv pip install -q "chonkie[qdrant]"

#### sample text

In [None]:
sample_text = """
Qdrant is a high-performance vector database designed for advanced AI applications.
It allows users to store and search large collections of high-dimensional vectors.
Vector databases are essential for Retrieval-Augmented Generation (RAG).
Hafedh is hosting a hackathon in Tunis today to teach people about Qdrant.
Participants are learning about different storage modes like Cloud, Memory, and Local.
Chunking is the process of breaking large text into smaller pieces for better retrieval.
Chonkie is a lightweight and fast library for chunking text in Python.
Using the right chunking strategy improves the accuracy of semantic search.
"""


Text file 'hackathon_info.txt' created.


In [None]:
from chonkie import QdrantHandshake, TokenChunker
from google.colab import userdata

# Common Embedding Model for all handshakes
MODEL_NAME = "all-MiniLM-L6-v2"

# 1. CLOUD HANDSHAKE
cloud_handshake = QdrantHandshake(
    url=userdata.get('QDRANT_URL'),
    api_key=userdata.get('QDRANT_API_KEY'),
    collection_name="cloud_handshake_demo",
    embedding_model=MODEL_NAME
)

# # 2. IN-MEMORY HANDSHAKE
# # Use :memory: for ephemeral sessions
# memory_handshake = QdrantHandshake(
#     location=":memory:",
#     collection_name="memory_demo",
#     embedding_model=MODEL_NAME
# )

# # 3. LOCAL PATH HANDSHAKE
# # Persistent folder in your Colab files
# local_handshake = QdrantHandshake(
#     path="./qdrant_handshake_storage",
#     collection_name="local_demo",
#     embedding_model=MODEL_NAME
# )

#### Sentence Chunking & Handshake Upload

In [None]:
from chonkie import QdrantHandshake, SentenceChunker
chunker = SentenceChunker(
    tokenizer='bert-base-uncased',
    chunk_size=100,
    chunk_overlap=10
)

chunks = chunker.chunk(sample_text)

# Handshake! (Embed and Insert in one line)
cloud_handshake.write(chunks)

print(f"Created {len(chunks)} semantic chunks and stored them in Qdrant!")

Created 2 semantic chunks and stored them in Qdrant!


#### Notes
- Vector Size: Ensure the length of the list in `vector=[...]` exactly matches the size defined in VectorParams (we have, 4).

- Payloads: These are like JSON objects. you can store text, numbers, or booleans here to filter their search results later.

- Batching: If they have thousands of points, you should insert them in batches (e.g., 100 at a time) rather than one by one for better performance.