What are points ? 
- central entity that Qdrant operates with.
- is a record consisting of a vector and an optional payload
- You can search among the points grouped in one collection based on vector similarit 

https://qdrant.tech/documentation/concepts/points/#upload-points 


1st run qdrant database  docker run -p 6333:6333 -v ${pwd}/qdrant:/qdrant/storage qdrant/qdrant

## simple test → adding points to collection 
- no validation: will give error if collection exist
- works ok: cretes collelction & uploads points 

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models

COLLECTION_NAME = "test_collection"

q_client = QdrantClient('localhost'  , port = '6333')
q_client.create_collection(
    collection_name= COLLECTION_NAME,
    vectors_config=models.VectorParams(size=3, distance=models.Distance.COSINE, on_disk=True)
)
q_client.upsert(
    collection_name=COLLECTION_NAME,
    points=models.Batch(
        ids=[1],
        payloads=[
            {"color": "red"},
            {"color": "green"},
            {"color": "blue"},
        ],
        vectors=[
            [0.9, 0.1, 0.1],
            [0.1, 0.9, 0.1], 
            [0.1, 0.1, 0.9],
        ],
    ),
)

## similar example but loading vectors from AI 


In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
from langchain.embeddings.openai import OpenAIEmbeddings

COLLECTION_NAME = "embedding_from_ai_collection"
STRING_TO_EMBEDD = "red is dead"

q_client = QdrantClient('localhost'  , port = '6333')
q_client.create_collection(
    collection_name= COLLECTION_NAME,
    vectors_config=models.VectorParams(size=1536, distance=models.Distance.COSINE, on_disk=True)
)
embedding = OpenAIEmbeddings().embed_query(STRING_TO_EMBEDD)
print(f"embedding for word: {STRING_TO_EMBEDD} is type type: {type(embedding)} and values: {embedding}")
q_client.upsert(
    collection_name=COLLECTION_NAME,
    points=models.Batch(
        ids=[1],
        payloads=[
            {"color": STRING_TO_EMBEDD},
        ],
        vectors=[embedding]
    ),
)

## fully working example with loading from disk & validations

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import codecs
from langchain.docstore.document import Document
from uuid import uuid4
from langchain.embeddings.openai import OpenAIEmbeddings
from qdrant_client.http import models
import time


COLLECTION_NAME = "first_collection"


def create_langchain_documents(file_path: str ) -> list[Document]:
    """ 
    genereate list of langchain documents from file 
    
    """    
    # step 1 - load documents from file , split them by \n\n and store in list

    splited_documents = []
    with codecs.open(file_path, "r", encoding="utf8") as f:
        raw_document = f.read()
        splited_documents = raw_document.split("\n\n")

    ### For debbuging purposes remove all but one element from list
    # splited_documents = splited_documents[:1]

    # step 2 - convert string documents to langchain documents & add tags to them

    langchain_documents = []
    for document in splited_documents:
        langchain_documents.append(
            Document(
                page_content=document,
                metadata={
                    "source": COLLECTION_NAME,
                    "content": document,
                    "uuid": uuid4(),
                },
            )
        )
    return langchain_documents
    # step 3 generate embeddings for documents and create pints


def create_points(langchain_documents: list[Document]) -> list[models.PointStruct]:
       
    """ 
    Convert list of langchain documents to list of qdrant points ready to upload to qdrant collection
    """
    points = []
    iterator = 0
    for l_document in langchain_documents:
        embedding = OpenAIEmbeddings().embed_query(l_document.page_content)
        points.append(
            models.PointStruct(
                id=iterator, payload=l_document.metadata, vector=embedding
            )
        )
        iterator += 1
    return points

def manage_qadrant_collenction():
    # Search for collection

    q_client = QdrantClient("localhost", port="6333")
    result = q_client.get_collections()
    print(f"searching for collection {COLLECTION_NAME} in {result.collections}")
    indexed = False
    for collection in result.collections:
        if collection.name == COLLECTION_NAME:
            indexed = True
            break

    # Create collection if not exists

    if not indexed:
        print(f"creating collection {COLLECTION_NAME}")
        q_client.create_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=models.VectorParams(
                size=1536, distance=models.Distance.COSINE, on_disk=True
            ),
        )

    # get collection

    print(f"getting created collection {COLLECTION_NAME}")
    first_collection = q_client.get_collection(collection_name=COLLECTION_NAME)

    # if no objects in collection - add some
    print(f"adding documents to collection {COLLECTION_NAME}")
    if first_collection and first_collection.points_count == 0:
        documents = create_langchain_documents( "various/vector-store-example.txt" )
        
        start_time = time.time()
        points = create_points(documents)
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"The function create_points executed in {execution_time} seconds")
                
        #upload points to collection
        start_time = time.time()     
        q_client.upsert(collection_name=COLLECTION_NAME, points=points)
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"The function upsert in {execution_time} seconds")


manage_qadrant_collenction()



## Improved aproach - with batches
1. Embed all at once not one-by-one
2. Load to quadrant in batches

The function executed 5 times faster, 
mostly because of embedning all at once → but its not attainable for large documents → will have to find max what model can receive and portion text so its maxed

The 2nd improvment i.e loading in batches to quardant is faster a bit → but probably on much larger scale it would show much greater proffit 

In [None]:
from qdrant_client import QdrantClient
from qdrant_client.http import models
import codecs
from langchain.docstore.document import Document
from uuid import uuid4
from langchain.embeddings.openai import OpenAIEmbeddings
from qdrant_client.http import models
import time


COLLECTION_NAME = "first_collection"


def create_langchain_documents(file_path: str ) -> list[Document]:
    """ 
    genereate list of langchain documents from single document
    
    """    

    splited_documents = []
    with codecs.open(file_path, "r", encoding="utf8") as f:
        raw_document = f.read()
        splited_documents = raw_document.split("\n\n")

    langchain_documents = []
    for document in splited_documents:
        langchain_documents.append(
            Document(
                page_content=document,
                metadata={
                    "source": COLLECTION_NAME,
                    "content": document,
                    "uuid": uuid4(),
                },
            )
        )
    return langchain_documents

def create_batch_points(documents: list[str]) :
       
    """ 
    Convert list of langchain documents to list of qdrant points ready to upload to qdrant collection
    """
    page_content_list = [doc.page_content for doc in documents]
    vector_list = OpenAIEmbeddings().embed_documents(page_content_list)
    
    ids = [str(doc.metadata["uuid"]) for doc in documents]
    payloads = [doc.metadata for doc in documents]
    
    points=models.Batch(
        ids = ids, 
        payloads = payloads,
        vectors = vector_list
    )
    return points


def manage_qadrant_collenction():
    # Search for collection

    q_client = QdrantClient("localhost", port="6333")
    result = q_client.get_collections()
    print(f"searching for collection {COLLECTION_NAME} in {result.collections}")
    indexed = False
    for collection in result.collections:
        if collection.name == COLLECTION_NAME:
            indexed = True
            break

    # Create collection if not exists

    if not indexed:
        print(f"creating collection {COLLECTION_NAME}")
        q_client.create_collection(
            collection_name=COLLECTION_NAME,
            vectors_config=models.VectorParams(
                size=1536, distance=models.Distance.COSINE, on_disk=True
            ),
        )

    # get collection

    print(f"getting created collection {COLLECTION_NAME}")
    first_collection = q_client.get_collection(collection_name=COLLECTION_NAME)

    # if no objects in collection - add some
    print(f"adding documents to collection {COLLECTION_NAME}")
    if first_collection and first_collection.points_count == 0:
        documents = create_langchain_documents( "various/vector-store-example.txt" )
        
        # convert to points with vector embeddings
        start_time = time.time()
        points = create_batch_points(documents)
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"The function create_batch_points executed in {execution_time} seconds")
        
        # upload batch points to collection
        start_time = time.time()
        q_client.upsert(collection_name=COLLECTION_NAME, points=points)
        end_time = time.time()
        execution_time = end_time - start_time
        print(f"The function upsert with batches executed in {execution_time} seconds")


manage_qadrant_collenction()


