In [None]:
from qdrant_client import QdrantClient, models

client = QdrantClient(":memory:")

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
client.create_collection(
    collection_name="scalar_collection",
    vectors_config=models.VectorParams(
        size=1536,
        distance=models.Distance.COSINE,
        on_disk=True,  # Move originals to disk
    ),
    quantization_config=models.ScalarQuantization(
        scalar=models.ScalarQuantizationConfig(
            type=models.ScalarType.INT8,
            quantile=0.99,  # Exclude extreme 1% of values
            always_ram=True,  # Keep quantized vectors in RAM
        )
    ),
)

True

In [None]:
# Binary quantization setup  
client.create_collection(
    collection_name="binary_collection",
    vectors_config=models.VectorParams(
        size=1536,
        distance=models.Distance.COSINE,
        on_disk=True,
    ),
    quantization_config=models.BinaryQuantization(
        binary=models.BinaryQuantizationConfig(
            encoding=models.BinaryQuantizationEncoding.ONE_BIT,
            always_ram=True,
        )
    ),
)

True

In [None]:
# Product quantization setup
client.create_collection(
    collection_name="pq_collection",
    vectors_config=models.VectorParams(
        size=1024,
        distance=models.Distance.COSINE,
        on_disk=True,
    ),
    quantization_config=models.ProductQuantization(
        product=models.ProductQuantizationConfig(
            compression=models.CompressionRatio.X32, #or X4, X8, X16, X32 and X64
            always_ram=True,
        )
    ),
)

True

Method	Accuracy	Speed	    Compression
Scalar	0.99	    up to 2x	4x
Binary	0.95*	    up to 40x	32x
Product	0.7	        0.5x	    up to 64x

In [None]:
response = client.query_points(
    collection_name="quantized_collection",
    query=[0.12] * 1536,
    limit=10,
    search_params=models.SearchParams(
        hnsw_ef=128,
        quantization=models.QuantizationSearchParams(
            ignore=False,  # Use quantization for initial search
            rescore=True,   # Enable original vectors-based rescoring
            oversampling=3.0,  # Retrieve 3x candidates for rescoring
        ),
    ),
    with_payload=True,
)

In [None]:
collection_name = "docs_search"

client.create_collection(
    collection_name=collection_name,
    vectors_config={
        "dense": models.VectorParams(size=384, distance=models.Distance.COSINE),
        "colbert": models.VectorParams(
            size=128,
            distance=models.Distance.COSINE,
            multivector_config=models.MultiVectorConfig(
                comparator=models.MultiVectorComparator.MAX_SIM
            ),
            hnsw_config=models.HnswConfigDiff(m=0)  # Reranking only
        )
    },
    sparse_vectors_config={"sparse": models.SparseVectorParams()}
)