# Qdrant Tutorial

### Imports

In [1]:
from qdrant_client import QdrantClient

### Connect to database

In [2]:
client = QdrantClient(host="localhost", port=6333) # Instantiate client

### Create collection

In [3]:
from qdrant_client.models import Distance, VectorParams

if client.collection_exists(collection_name='my_collection'):
    print("Collection already exists!")
    client.delete_collection(collection_name='my_collection')
client.create_collection(
    collection_name="my_collection",
    vectors_config=VectorParams(size=768, distance=Distance.COSINE),
) # Configure params

Collection already exists!


True

### Generate embeddings

In [4]:
from sentence_transformers import SentenceTransformer

embedding_fn = SentenceTransformer('sentence-transformers/paraphrase-albert-small-v2')

# Text strings to search from
docs = [
    "Artificial intelligence was founded as an academic discipline in 1956.",
    "Alan Turing was the first person to conduct substantial research in AI.",
    "Born in Maida Vale, London, Turing was raised in southern England.",
]

vectors = embedding_fn.encode(docs) # We vectorize each of the sentences

# The output vector has 768 dimensions, matching the collection that we just created
print("Dim:", vectors[0].shape)

  from tqdm.autonotebook import tqdm, trange
2024-06-26 11:55:33.302216: E external/local_xla/xla/stream_executor/cuda/cuda_dnn.cc:9261] Unable to register cuDNN factory: Attempting to register factory for plugin cuDNN when one has already been registered
2024-06-26 11:55:33.302244: E external/local_xla/xla/stream_executor/cuda/cuda_fft.cc:607] Unable to register cuFFT factory: Attempting to register factory for plugin cuFFT when one has already been registered
2024-06-26 11:55:33.303352: E external/local_xla/xla/stream_executor/cuda/cuda_blas.cc:1515] Unable to register cuBLAS factory: Attempting to register factory for plugin cuBLAS when one has already been registered
2024-06-26 11:55:33.310257: I tensorflow/core/platform/cpu_feature_guard.cc:182] This TensorFlow binary is optimized to use available CPU instructions in performance-critical operations.
To enable the following instructions: AVX2 FMA, in other operations, rebuild TensorFlow with the appropriate compiler flags.


Dim: (768,)


### Insert embeddings

In [5]:
from qdrant_client.models import PointStruct

res = client.upsert(
    collection_name="my_collection",
    points=[
        PointStruct(
            id=idx, # Id is an integer
            vector=vector.tolist(), # Vector is a strict Python list
            payload={"subject": "history",
                     "text": docs[idx]} # Attributes are prompted inside a dictionary
        )
        for idx, vector in enumerate(vectors)
    ]
)

In [6]:
print(res)

operation_id=0 status=<UpdateStatus.COMPLETED: 'completed'>


Returns an UpdateResult object

### Perform search

In [7]:
query_vectors = embedding_fn.encode(["Who is Alan Turing?"]) # Query text to embedding

In [8]:
hits = client.search(
    collection_name="my_collection",
    query_vector=query_vectors[0],
    limit=5  # Return 5 closest points
)

In [9]:
hits

[ScoredPoint(id=2, version=0, score=0.5859946, payload={'subject': 'history', 'text': 'Born in Maida Vale, London, Turing was raised in southern England.'}, vector=None, shard_key=None),
 ScoredPoint(id=1, version=0, score=0.5118257, payload={'subject': 'history', 'text': 'Alan Turing was the first person to conduct substantial research in AI.'}, vector=None, shard_key=None),
 ScoredPoint(id=0, version=0, score=0.12895359, payload={'subject': 'history', 'text': 'Artificial intelligence was founded as an academic discipline in 1956.'}, vector=None, shard_key=None)]

### Perform filtering

In [10]:
# Insert more docs in another subject.
docs = [
    "Machine learning has been used for drug design.",
    "Computational synthesis with AI algorithms predicts molecular properties.",
    "DDR1 is involved in cancers and fibrosis.",
]
vectors = embedding_fn.encode(docs)

res = client.upsert(
    collection_name="my_collection",
    points=[
        PointStruct(
            id=idx+3, # Id is an integer
            vector=vector.tolist(), # Vector is a strict Python list
            payload={"subject": "biology",
                     "text": docs[idx]} # Attributes are prompted inside a dictionary
        )
        for idx, vector in enumerate(vectors)
    ]
)

In [11]:
query_for_filtering = embedding_fn.encode(['tell me AI realted information'])

Perform filtering inside a vector search

In [12]:
from qdrant_client.models import Filter, FieldCondition, MatchValue

hits = client.search(
    collection_name="my_collection",
    query_vector=query_for_filtering[0],
    query_filter=Filter(
        must=[  # These conditions are required for search results
            FieldCondition(
                key="subject",
                match=MatchValue(
                    value="biology",
                ),
            )

        ]
    ),
    limit=2  # Return 5 closest points
)

In [59]:
hits

[ScoredPoint(id=4, version=3, score=0.20548509, payload={'subject': 'biology', 'text': 'Computational synthesis with AI algorithms predicts molecular properties.'}, vector=None, shard_key=None),
 ScoredPoint(id=3, version=3, score=0.16327181, payload={'subject': 'biology', 'text': 'Machine learning has been used for drug design.'}, vector=None, shard_key=None)]

Perform filtering alone

In [60]:
hits = client.scroll(
    collection_name='my_collection',
    scroll_filter=Filter(
        must=[
            FieldCondition(
                key='subject',
                match=MatchValue(
                    value='history'
                )
            )
        ]
    ),
    limit=3
)

In [61]:
hits

([Record(id=0, payload={'subject': 'history', 'text': 'Artificial intelligence was founded as an academic discipline in 1956.'}, vector=None, shard_key=None),
  Record(id=1, payload={'subject': 'history', 'text': 'Alan Turing was the first person to conduct substantial research in AI.'}, vector=None, shard_key=None),
  Record(id=2, payload={'subject': 'history', 'text': 'Born in Maida Vale, London, Turing was raised in southern England.'}, vector=None, shard_key=None)],
 None)

### Delete data

In [62]:
from qdrant_client.models import PointIdsList

res = client.delete(
    collection_name="my_collection",
    points_selector=PointIdsList(
        points=[0, 2],
    ),
)

In [63]:
res

UpdateResult(operation_id=4, status=<UpdateStatus.COMPLETED: 'completed'>)

In [67]:
client.scroll(
    collection_name='my_collection',
    scroll_filter=Filter(
        should=[
            FieldCondition(
                key='subject',
                match=MatchValue(
                    value='history'
                )
            ),
            FieldCondition(
                key='subject',
                match=MatchValue(
                    value='biology'
                )
            )
        ]
    ),
    limit=10
)

([Record(id=1, payload={'subject': 'history', 'text': 'Alan Turing was the first person to conduct substantial research in AI.'}, vector=None, shard_key=None),
  Record(id=3, payload={'subject': 'biology', 'text': 'Machine learning has been used for drug design.'}, vector=None, shard_key=None),
  Record(id=4, payload={'subject': 'biology', 'text': 'Computational synthesis with AI algorithms predicts molecular properties.'}, vector=None, shard_key=None),
  Record(id=5, payload={'subject': 'biology', 'text': 'DDR1 is involved in cancers and fibrosis.'}, vector=None, shard_key=None)],
 None)

### Reconnect

In [68]:
client.close()
del client

In [69]:
client = QdrantClient(host="localhost", port=6333) # Instantiate client

In [70]:
client.scroll(
    collection_name='my_collection',
    scroll_filter=Filter(
        should=[
            FieldCondition(
                key='subject',
                match=MatchValue(
                    value='history'
                )
            ),
            FieldCondition(
                key='subject',
                match=MatchValue(
                    value='biology'
                )
            )
        ]
    ),
    limit=10
)

([Record(id=1, payload={'subject': 'history', 'text': 'Alan Turing was the first person to conduct substantial research in AI.'}, vector=None, shard_key=None),
  Record(id=3, payload={'subject': 'biology', 'text': 'Machine learning has been used for drug design.'}, vector=None, shard_key=None),
  Record(id=4, payload={'subject': 'biology', 'text': 'Computational synthesis with AI algorithms predicts molecular properties.'}, vector=None, shard_key=None),
  Record(id=5, payload={'subject': 'biology', 'text': 'DDR1 is involved in cancers and fibrosis.'}, vector=None, shard_key=None)],
 None)

### Drop collection

In [13]:
client.delete_collection(collection_name='my_collection')

True

In [17]:
from qdrant_client.http.exceptions import UnexpectedResponse

try:
    client.scroll(
        collection_name='my_collection',
        scroll_filter=Filter(
            should=[
                FieldCondition(
                    key='subject',
                    match=MatchValue(
                        value='history'
                    )
                ),
                FieldCondition(
                    key='subject',
                    match=MatchValue(
                        value='biology'
                    )
                )
            ]
        ),
        limit=10
    )
except UnexpectedResponse as e:
    if e.status_code == 404:
        print("Collection not found")

Collection not found
