In [1]:
import weaviate
import torch
import os
import copy

class VectorManager:
    def __init__(self) -> None:
        """
        Set up the connection

        INPUT: None
        ------------------------------------

        RETURNS: None
        ------------------------------------
        """
        self._client = weaviate.Client(f"http://{os.environ.get('WEAVIATE_HOST')}:{os.environ.get('WEAVIATE_C_PORT')}")
        
    def _id2uuid(self, collection_name: str, id_no: str) -> str:
        where_filter = {
            'operator': 'Equal',
            'valueText': id_no,
            'path': ["id_no"]
        }

        query_result =  self._client.query.get(collection_name, ["id_no", "_additional {id}"]).with_where(where_filter).do()
        uuid = query_result['data']['Get'][collection_name][0]['_additional']['id']
        return uuid
    
    def _exists(self, collection_name: str, id_no: str) -> bool:
        where_filter = {
            'operator': 'Equal',
            'valueText': id_no,
            'path': ['id_no']
        }
        response = self._client.query.get(collection_name, ["id_no", "_additional {id}"]).with_where(where_filter).do()
        if len(response['data']['Get'][collection_name]) > 1:
            print("There exist duplicated files")
            return True
        elif len(response['data']['Get'][collection_name]) == 1:
            return True
        return False

    def delete_collection(self, collection_name: str) -> None:
        try:
            self._client.schema.delete_class(collection_name)
            print('Successfully deleted')
        except Exception as e:
            if '400' in str(e):
                print('Collection does not exist so nothing to delete')
            else:
                print(f"Unknown error with error message -> {e}")
            
    def delete_document(self, collection_name: str, id_no: str) -> None:
        # Check if the id exist
        id_exists = self._exists(collection_name, id_no)
        
        if not id_exists:
            print("This id does not exist so no deletion is done")
            return
        
        uuid = self._id2uuid(collection_name, id_no)
        
        try:
            self._client.data_object.delete(uuid=uuid, class_name=collection_name)
            print('Successfully deleted')
        except Exception as e:
            print(f"Unknown error with error message -> {e}")

    def create_collection(self, collection_name: str, schema: dict) -> None:
        """
        create a collection of documents

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        schema:             Schema for each document
                            example: {
                                "properties":{
                                    "id_no": ["text"],
                            }}

        RETURNS: None
        ------------------------------------
        """
        # Ensure that there is a id_no for the schema
        if not schema['properties'].get('id_no'):
            print('Lack of id_no as an attribute in property')
            return
        
        # Extract the document information into a list
        properties = []
        for key, val in schema['properties'].items():
            properties.append({'name': key, 'dataType': val})

        # Proper formetting for creating the schema
        document_schema = {
            'class': collection_name,
            'vectorizer': 'none',
            'properties': properties
        }

        # Try to create schema. If exists, gracefully exit.
        try:
            self._client.schema.create_class(document_schema)
            print("Collection Successfully created")
        except Exception as e:
            if '422' in str(e):
                print("Collection has been created")
            else:
                print(f"Unknown error with error message -> {e}")


    def create_document(self, collection_name: str, properties: dict, embedding: torch.Tensor) -> None:
        """
        create a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        properties:         Schema for the document
                            example: {
                                "id_no": 72671
                            }
        embedding:          embedding for the document
                            example: torch.Tensor([[
                                1, 2, ..., 512
                            ]])

        RETURNS: None
        ------------------------------------
        """
        # Check if the id_no attribute exist
        if not properties.get('id_no'):
            print('Lack of id_no as an attribute in property')
            return

        # Check if the id exist
        id_exists = self._exists(collection_name, id_no)
        
        if id_exists:
            print("This id already existed please use update instead")
            return

        # Create document
        self._client.data_object.create(
          properties,
          collection_name,
          vector = embedding
        )

    def read_document(self, collection_name: str, id_no: str) -> dict:
        """
        Find a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        id_no:              id of document
                            example: "72671"

        RETURNS: None
        ------------------------------------
        """
        # Create filter and search
        uuid = self._id2uuid(collection_name, id_no)
        return self._client.data_object.get_by_id(uuid = uuid, class_name = collection_name, with_vector = True)

    

    def update_document(self, collection_name: str, document: dict) -> None:
        """
        Update a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        document:           dctionary format of the update document
                            {
                                'id_no': '4848272',
                                'vector': torch.Tensor([[
                                    1, 2, ..., 512
                                ]]),
                                'name': 'Trump'
                            }

        RETURNS: None
        ------------------------------------
        """
        if not document.get('id_no'):
            print('Lack of id_no result in ambiguous document to update')
            return
        if not self._exists(collection_name, document['id_no']):
            print('Attempt to update a non-existent document. No update is done')
            return
        uuid = self._id2uuid(collection_name, document['id_no'])
        temp = copy.deepcopy(document)
        if 'vector' in document.keys():
            new_vector = temp['vector']
            if len(document.keys()) == 2:
                self._client.data_object.update(
                    data_object = {}, 
                    class_name = collection_name, 
                    uuid = uuid, 
                    vector = new_vector
                )
            else:
                del temp['id_no']
                del temp['vector']
                try:
                    self._client.data_object.update(
                        data_object = temp, 
                        class_name = collection_name, 
                        uuid = uuid, 
                        vector = new_vector
                    )
                except Exception as e:
                    if '400' in str(e):
                        print("Unknown field(s) in document")
                    else:
                        print(f"Unknown error with error message -> {e}")
                    

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
client = VectorManager()

In [3]:
input_schema = {
    "properties":{
        "id_no": ["text"],
        "name": ["text"]
}}
collection_name = 'Faces'

In [4]:
client.delete_collection(collection_name)

Collection does not exist so nothing to delete


In [5]:
client.delete_collection(collection_name)

Collection does not exist so nothing to delete


In [6]:
client.create_collection(collection_name, input_schema)

Collection Successfully created


In [7]:
client.create_collection(collection_name, input_schema)

Collection has been created


In [8]:
import torch
from tqdm import tqdm

In [9]:
for id_no in tqdm(range(10)):
    face_emb = torch.rand(1, 512)
    if len(face_emb) != 0:
        data_obj = {
            "id_no": "{}".format(id_no),
            "name": "anonymous"
        }
        client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 165.88it/s]


In [10]:
client.read_document(collection_name, "1")

{'class': 'Faces',
 'creationTimeUnix': 1670925983229,
 'id': 'bc199e3e-078f-465b-8258-480ba4cfc3b1',
 'lastUpdateTimeUnix': 1670925983229,
 'properties': {'id_no': '1', 'name': 'anonymous'},
 'vector': [0.0827803,
  0.10453594,
  0.30214924,
  0.35045874,
  0.8916652,
  0.16060615,
  0.009492099,
  0.83223736,
  0.040422797,
  0.30519634,
  0.28400612,
  0.17946625,
  0.53876823,
  0.38034856,
  0.07579094,
  0.32081044,
  0.5282044,
  0.21188939,
  0.85807073,
  0.9921384,
  0.11055088,
  0.12863696,
  0.61413723,
  0.6863716,
  0.05624026,
  0.42584467,
  0.67568845,
  0.22743142,
  0.031163454,
  0.7051295,
  0.7955521,
  0.6735202,
  0.78970635,
  0.61450386,
  0.29970425,
  0.53691244,
  0.7135871,
  0.7558219,
  0.5507147,
  0.7376311,
  0.05571592,
  0.8045178,
  0.74441344,
  0.7830338,
  0.30678535,
  0.45928478,
  0.020294249,
  0.20239413,
  0.2287013,
  0.06906253,
  0.5393349,
  0.22749043,
  0.1471138,
  0.5967759,
  0.20969576,
  0.38589084,
  0.9758699,
  0.62691945,
 

In [11]:
client.delete_document(collection_name, '1')

Successfully deleted


In [12]:
client.delete_document(collection_name, '1')

This id does not exist so no deletion is done


In [13]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670925983233,
 'id': 'f272ac26-0a14-47d3-976b-3ed630ba8c01',
 'lastUpdateTimeUnix': 1670925983233,
 'properties': {'id_no': '2', 'name': 'anonymous'},
 'vector': [0.9019266,
  0.008109748,
  0.2948833,
  0.4529515,
  0.34288156,
  0.96535075,
  0.94037616,
  0.92208487,
  0.26822394,
  0.2647578,
  0.22906196,
  0.25754386,
  0.8691051,
  0.31695056,
  0.06940949,
  0.048167646,
  0.9811652,
  0.85468554,
  0.78408337,
  0.42684376,
  0.8013735,
  0.504638,
  0.7013482,
  0.8027439,
  0.9478896,
  0.5406642,
  0.20001042,
  0.30232996,
  0.51657486,
  0.4932425,
  0.63309395,
  0.7902623,
  0.077542126,
  0.31953686,
  0.71998316,
  0.5309155,
  0.8839184,
  0.985636,
  0.00057035685,
  0.72845036,
  0.42914736,
  0.6192596,
  0.7110736,
  0.38919032,
  0.68931824,
  0.20046896,
  0.35306448,
  0.4544857,
  0.2557454,
  0.18579865,
  0.28003603,
  0.88549787,
  0.95331717,
  0.15690643,
  0.8119782,
  0.9746704,
  0.3872879,
  0.8548252,
  0.066

In [14]:
update = {
    'id_no': '2',
    'vector': torch.rand(1, 512),
    'name': 'Trump'
}

In [15]:
client.update_document(collection_name, update)

In [16]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670925983233,
 'id': 'f272ac26-0a14-47d3-976b-3ed630ba8c01',
 'lastUpdateTimeUnix': 1670925986238,
 'properties': {'id_no': '2', 'name': 'Trump'},
 'vector': [0.066660345,
  0.3099112,
  0.08470839,
  0.6147607,
  0.5579704,
  0.75606334,
  0.49928093,
  0.55334455,
  0.03626722,
  0.42976588,
  0.5369635,
  0.8763802,
  0.6598307,
  0.24930555,
  0.93971616,
  0.6078153,
  0.6810063,
  0.93331224,
  0.7752558,
  0.84285843,
  0.79448456,
  0.060895443,
  0.7349062,
  0.058766603,
  0.5625347,
  0.2882952,
  0.6798298,
  0.39611423,
  0.606769,
  0.7669083,
  0.9932972,
  0.6764469,
  0.9537864,
  0.88370484,
  0.94944835,
  0.7362681,
  0.19894016,
  0.23153651,
  0.0805459,
  0.89806634,
  0.36336792,
  0.9078771,
  0.9176139,
  0.091757536,
  0.60638463,
  0.6432173,
  0.45356423,
  0.92874175,
  0.14443082,
  0.94745165,
  0.59428024,
  0.734819,
  0.58684444,
  0.92289454,
  0.8030031,
  0.40325224,
  0.617369,
  0.042309523,
  0.48813117,


In [17]:
update = {
    'id_no': '2',
    'vector': torch.rand(1, 512),
    'age': '10'
}

In [18]:
client.update_document(collection_name, update)

Unknown field(s) in document


In [19]:
update = {
    'id_no': '4848272',
    'vector': torch.rand(1, 512),
}

In [20]:
client.update_document(collection_name, update)

Attempt to update a non-existent document. No update is done
