In [1]:
# Added logging to see underline API calls of certain frameworks
import logging
logging.basicConfig(format='%(asctime)s : %(levelname)s : %(message)s', level=logging.INFO)

In [2]:
from utilities import get_key

from qdrant_client import QdrantClient
from qdrant_client.http import models
import numpy as np
from faker import Faker

client = QdrantClient(
    url=get_key("QDRANT_URL"),
    api_key=get_key("QDRANT_KEY"),
)
client

<qdrant_client.qdrant_client.QdrantClient at 0x1cad031e150>

In [3]:
# collection name
dummy_data_collection = "dummy_collection_01"

In [4]:
# Fake User Data
dummy_data_genrator = Faker()
dummy_data_genrator.name(), dummy_data_genrator.address(), dummy_data_genrator.country(), dummy_data_genrator.color()


('Denise Jones',
 '811 Michelle Burgs\nDunnberg, MN 44430',
 'Lithuania',
 '#858e07')

In [5]:
vector_data_size = 100 # this is the Embedding dimensions
vector_data = np.random.uniform(low=-1.0,high=1.0, size=(vector_data_size,100)) # genrating random embeddings for dummy data
index = list(range(vector_data_size))

In [6]:
payload_data = [] # genrating dummy payloads/data using Faker()

for r in range(vector_data_size):
    payload_data.append(
        {
            "name":dummy_data_genrator.name(),
            "address":dummy_data_genrator.address(),
            "country":dummy_data_genrator.country(),
            "url":dummy_data_genrator.url(),
            "year":dummy_data_genrator.year(),
            "color":dummy_data_genrator.color()
        }
    )
    
payload_data[:2]

[{'name': 'Michael Hall',
  'address': '4989 Jessica Loop Apt. 164\nJustinview, NM 87428',
  'country': 'United States Minor Outlying Islands',
  'url': 'https://gonzalez-keith.org/',
  'year': '1988',
  'color': '#d67d2a'},
 {'name': 'Jennifer Williams',
  'address': '287 Mills Stream\nRiosborough, IN 69695',
  'country': 'Vietnam',
  'url': 'http://harrington.net/',
  'year': '1990',
  'color': '#dd4975'}]

In [7]:
try:
    # Creating a collection in vector database
    client.create_collection(
        collection_name=dummy_data_collection, # collection name!
        vectors_config=models.VectorParams(size=100, distance=models.Distance.COSINE) # We are going to use COSINE similarity sementic search
    )
except Exception as ex:
    print(ex)

2024-03-22 07:36:45,391 : INFO : HTTP Request: PUT https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01 "HTTP/1.1 200 OK"


In [8]:
# After creating the collection we will upsert/update/add our data in this newly created collection
# In this senario we are adding out random genrated data with those random genrated vectors/embeddings
client.upsert(
    collection_name=dummy_data_collection,
    points=models.Batch(
        ids=index, # this will work as index/points in our vector db
        vectors=vector_data.tolist(), # these are our vectors/embeddings
        payloads=payload_data # related data for those embeddings
    )
)

2024-03-22 07:39:51,739 : INFO : HTTP Request: PUT https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01/points?wait=true "HTTP/1.1 200 OK"


UpdateResult(operation_id=0, status=<UpdateStatus.COMPLETED: 'completed'>)

In [9]:
# Now we will search some simillar points based on one random vector 
# dymentions of the vector should be same
# there is no pattern in this data and everything is random so the score might not be gr8
serch_input_vector = np.random.uniform(low=-1.0,high=1.0, size=(vector_data_size))

client.search(
    collection_name=dummy_data_collection,
    query_vector=serch_input_vector, # Random vector
    limit=5 # top 5 results
)

2024-03-22 07:41:32,451 : INFO : HTTP Request: POST https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01/points/search "HTTP/1.1 200 OK"


[ScoredPoint(id=30, version=0, score=0.25461084, payload={'address': '51147 Dustin Court Apt. 567\nSouth Carlos, AL 86378', 'color': '#a6e07d', 'country': 'Andorra', 'name': 'Holly Rangel', 'url': 'http://anderson.biz/', 'year': '1986'}, vector=None, shard_key=None),
 ScoredPoint(id=47, version=0, score=0.22383998, payload={'address': '6427 David Ridges\nAmyfort, CA 90067', 'color': '#5cf2ae', 'country': 'Trinidad and Tobago', 'name': 'Paul Kelley', 'url': 'https://www.harris.com/', 'year': '1984'}, vector=None, shard_key=None),
 ScoredPoint(id=81, version=0, score=0.18675603, payload={'address': '88154 Rogers Shoal\nPiercetown, VA 17009', 'color': '#eaccff', 'country': 'Bermuda', 'name': 'Cheryl Blackburn', 'url': 'https://mueller.com/', 'year': '1999'}, vector=None, shard_key=None),
 ScoredPoint(id=37, version=0, score=0.18208194, payload={'address': 'Unit 9573 Box 4380\nDPO AA 29378', 'color': '#3ce83a', 'country': 'Niger', 'name': 'Darrell Martinez', 'url': 'http://york.com/', 'yea

In [10]:
# In this example we will get perfect score as we will use first exact randomly genrated vector
client.search(
    collection_name=dummy_data_collection,
    query_vector=vector_data[0], # 100% matching vector
    limit=15
)

2024-03-22 07:43:57,525 : INFO : HTTP Request: POST https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01/points/search "HTTP/1.1 200 OK"


[ScoredPoint(id=0, version=0, score=1.0, payload={'address': '4989 Jessica Loop Apt. 164\nJustinview, NM 87428', 'color': '#d67d2a', 'country': 'United States Minor Outlying Islands', 'name': 'Michael Hall', 'url': 'https://gonzalez-keith.org/', 'year': '1988'}, vector=None, shard_key=None),
 ScoredPoint(id=66, version=0, score=0.2723331, payload={'address': '07927 Porter Curve Apt. 217\nDominguezfurt, TX 51974', 'color': '#23d15d', 'country': 'Cape Verde', 'name': 'Seth Powers', 'url': 'https://estrada.com/', 'year': '1994'}, vector=None, shard_key=None),
 ScoredPoint(id=48, version=0, score=0.26127163, payload={'address': '42248 Williams Tunnel Apt. 849\nSouth Courtneyberg, PR 48708', 'color': '#d1f49c', 'country': 'Azerbaijan', 'name': 'Katherine Wells', 'url': 'http://morris.org/', 'year': '2016'}, vector=None, shard_key=None),
 ScoredPoint(id=16, version=0, score=0.24914049, payload={'address': '753 Clark Square Suite 332\nCrawfordshire, MD 79036', 'color': '#aefcaf', 'country': '

In [11]:
# 100% baised result
payload_data[0], payload_data[0]["year"], vector_data[0][:4]

({'name': 'Michael Hall',
  'address': '4989 Jessica Loop Apt. 164\nJustinview, NM 87428',
  'country': 'United States Minor Outlying Islands',
  'url': 'https://gonzalez-keith.org/',
  'year': '1988',
  'color': '#d67d2a'},
 '1988',
 array([-0.43746901, -0.46790568, -0.82528065,  0.94929708]))

In [12]:
# we can create custom filters if like : year that we want to keep same or genre in if lets say making some video or book recommendations.
# we will create 2 filters one that we will use to get exact user match and one random
custom_filter_100_percent = models.Filter(
    must=[
        models.FieldCondition(
            key="year", match=models.MatchValue(value=payload_data[0]["year"])
        )
    ]
)

custom_filter_random = models.Filter(
    must=[
        models.FieldCondition(
            key="year", match=models.MatchValue(value="1980")
        )
    ]
)

custom_filter_100_percent, custom_filter_random

(Filter(should=None, min_should=None, must=[FieldCondition(key='year', match=MatchValue(value='1988'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None),
 Filter(should=None, min_should=None, must=[FieldCondition(key='year', match=MatchValue(value='1980'), range=None, geo_bounding_box=None, geo_radius=None, geo_polygon=None, values_count=None)], must_not=None))

In [14]:
# now we will do search but with custom query filter
# Search with custom filter baised
client.search(
    collection_name=dummy_data_collection,
    query_vector=vector_data[0], # 100% matching vector,
    query_filter= custom_filter_100_percent,
    limit=15
)

2024-03-22 07:48:55,211 : INFO : HTTP Request: POST https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01/points/search "HTTP/1.1 200 OK"


[ScoredPoint(id=0, version=0, score=1.0, payload={'address': '4989 Jessica Loop Apt. 164\nJustinview, NM 87428', 'color': '#d67d2a', 'country': 'United States Minor Outlying Islands', 'name': 'Michael Hall', 'url': 'https://gonzalez-keith.org/', 'year': '1988'}, vector=None, shard_key=None),
 ScoredPoint(id=13, version=0, score=0.06378834, payload={'address': '409 Mark Mall\nLake Colton, CO 02725', 'color': '#2cd337', 'country': 'Suriname', 'name': 'Christy Wilson', 'url': 'http://www.mata-arellano.com/', 'year': '1988'}, vector=None, shard_key=None),
 ScoredPoint(id=77, version=0, score=-0.13962737, payload={'address': '998 Cruz Port Apt. 751\nEast Philipchester, MD 33192', 'color': '#536ef4', 'country': 'Eritrea', 'name': 'Samuel Watson', 'url': 'http://www.gonzalez.biz/', 'year': '1988'}, vector=None, shard_key=None)]

In [13]:
# this is with random filter
# we may or may not get result as due to the size of data set and due to exact year missmatch in this senario
client.search(
    collection_name=dummy_data_collection,
    query_vector=serch_input_vector, # Random vector
    query_filter= custom_filter_random,
    limit=15
)

2024-03-22 07:47:29,963 : INFO : HTTP Request: POST https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01/points/search "HTTP/1.1 200 OK"


[]

In [15]:
# Recommendation Engine
# we can create recomendation engines based on our vector database collections
client.recommend(
    collection_name=dummy_data_collection,
    positive=[1], # index of similar pattern
    negative=[75,40], # negitive index
    # query_filter= custom_filter_random, # custom filter if want to add in recomendation
    limit=5
)


2024-03-22 07:51:18,401 : INFO : HTTP Request: POST https://b54f2f73-aa84-43cb-9e5b-48738d261af3.us-east4-0.gcp.cloud.qdrant.io:6333/collections/dummy_collection_01/points/recommend "HTTP/1.1 200 OK"


[ScoredPoint(id=95, version=0, score=0.17628264, payload={'address': '8743 Jackson Mission Suite 930\nPetersonfurt, AR 93347', 'color': '#8cf291', 'country': 'Sweden', 'name': 'Timothy Callahan', 'url': 'http://kennedy.org/', 'year': '1996'}, vector=None, shard_key=None),
 ScoredPoint(id=84, version=0, score=0.17602436, payload={'address': '682 Thompson Pike\nNorth Jamestown, IL 72640', 'color': '#ddf776', 'country': 'Oman', 'name': 'Timothy Lin', 'url': 'http://www.walsh-dunn.com/', 'year': '2016'}, vector=None, shard_key=None),
 ScoredPoint(id=79, version=0, score=0.1753848, payload={'address': '367 Shaffer Point Apt. 598\nJenniferville, SC 04769', 'color': '#6f38a0', 'country': 'Grenada', 'name': 'Cynthia Davis', 'url': 'http://gregory.com/', 'year': '2000'}, vector=None, shard_key=None),
 ScoredPoint(id=78, version=0, score=0.17087176, payload={'address': '67004 Randy Mount Suite 927\nWilliamland, AS 95892', 'color': '#ba4232', 'country': 'Liberia', 'name': 'George Owen', 'url': 'h