In [33]:
import weaviate
import torch
import os
import copy

class VectorManager:
    def __init__(self) -> None:
        """
        Set up the connection

        INPUT: None
        ------------------------------------

        RETURNS: None
        ------------------------------------
        """
        self._client = weaviate.Client(f"http://{os.environ.get('WEAVIATE_HOST')}:{os.environ.get('WEAVIATE_C_PORT')}")
        
    def _id2uuid(self, collection_name: str, id_no: str) -> str:
        where_filter = {
            'operator': 'Equal',
            'valueText': id_no,
            'path': ["id_no"]
        }

        query_result =  self._client.query.get(collection_name, ["id_no", "_additional {id}"]).with_where(where_filter).do()
        uuid = query_result['data']['Get'][collection_name][0]['_additional']['id']
        return uuid
    
    def _exists(self, id_no:str) -> bool:
        where_filter = {
            'operator': 'Equal',
            'valueText': id_no,
            'path': ['id_no']
        }
        response = self._client.query.get(collection_name, ["id_no", "_additional {id}"]).with_where(where_filter).do()
        if len(response['data']['Get'][collection_name]) > 1:
            print("There exist duplicated files")
            return True
        elif len(response['data']['Get'][collection_name]) == 1:
            return True
        return False

    def delete_collection(self, collection_name: str) -> None:
        try:
            self._client.schema.delete_class(collection_name)
            print('Successfully deleted')
        except Exception as e:
            if '400' in str(e):
                print('Collection does not exist so nothing to delete')
            else:
                print(f"Unknown error with error message -> {e}")
            
    def delete_document(self, collection_name: str, id_no: str) -> None:
        # Check if the id exist
        id_exists = self._exists(id_no)
        
        if not id_exists:
            print("This id does not exist so no deletion is done")
            return
        
        uuid = self._id2uuid(collection_name, id_no)
        
        try:
            self._client.data_object.delete(uuid=uuid, class_name=collection_name)
            print('Successfully deleted')
        except Exception as e:
            print(f"Unknown error with error message -> {e}")

    def create_collection(self, collection_name: str, schema: dict) -> None:
        """
        create a collection of documents

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        schema:             Schema for each document
                            example: {
                                "properties":{
                                    "id_no": ["text"],
                            }}

        RETURNS: None
        ------------------------------------
        """
        # Ensure that there is a id_no for the schema
        if not schema['properties'].get('id_no'):
            print('Lack of id_no as an attribute in property')
            return
        
        # Extract the document information into a list
        properties = []
        for key, val in schema['properties'].items():
            properties.append({'name': key, 'dataType': val})

        # Proper formetting for creating the schema
        document_schema = {
            'class': collection_name,
            'vectorizer': 'none',
            'properties': properties
        }

        # Try to create schema. If exists, gracefully exit.
        try:
            self._client.schema.create_class(document_schema)
            print("Collection Successfully created")
        except Exception as e:
            if '422' in str(e):
                print("Collection has been created")
            else:
                print(f"Unknown error with error message -> {e}")


    def create_document(self, collection_name: str, properties: dict, embedding: torch.Tensor) -> None:
        """
        create a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        properties:         Schema for the document
                            example: {
                                "id_no": 72671
                            }
        embedding:          embedding for the document
                            example: torch.Tensor([[
                                1, 2, ..., 512
                            ]])

        RETURNS: None
        ------------------------------------
        """
        # Check if the id_no attribute exist
        if not properties.get('id_no'):
            print('Lack of id_no as an attribute in property')
            return

        # Check if the id exist
        id_exists = self._exists(id_no)
        
        if id_exists:
            print("This id already existed please use update instead")
            return

        # Create document
        self._client.data_object.create(
          properties,
          collection_name,
          vector = embedding
        )

    def read_document(self, collection_name: str, id_no: str) -> dict:
        """
        Find a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        id_no:              id of document
                            example: "72671"

        RETURNS: None
        ------------------------------------
        """
        # Create filter and search
        uuid = self._id2uuid(collection_name, id_no)
        return self._client.data_object.get_by_id(uuid = uuid, class_name = collection_name, with_vector = True)

    

    def update_document(self, collection_name: str, document: dict) -> None:
        """
        Update a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        document:           dctionary format of the update document
                            {
                                'id_no': '4848272',
                                'vector': torch.Tensor([[
                                    1, 2, ..., 512
                                ]]),
                                'name': 'Trump'
                            }

        RETURNS: None
        ------------------------------------
        """
        if not document.get('id_no'):
            print('Lack of id_no result in ambiguous document to update')
            return
        if not self._exists(document['id_no']):
            print('Attempt to update a non-existent document. No update is done')
            return
        uuid = self._id2uuid(collection_name, document['id_no'])
        temp = copy.deepcopy(document)
        if 'vector' in document.keys():
            new_vector = temp['vector']
            if len(document.keys()) == 2:
                self._client.data_object.update(
                    data_object = {}, 
                    class_name = collection_name, 
                    uuid = uuid, 
                    vector = new_vector
                )
            else:
                del temp['id_no']
                del temp['vector']
                try:
                    self._client.data_object.update(
                        data_object = temp, 
                        class_name = collection_name, 
                        uuid = uuid, 
                        vector = new_vector
                    )
                except Exception as e:
                    if '400' in str(e):
                        print("Unknown field(s) in document")
                    else:
                        print(f"Unknown error with error message -> {e}")
                    

In [34]:
client = VectorManager()

In [35]:
input_schema = {
    "properties":{
        "id_no": ["text"],
        "name": ["text"]
}}
collection_name = 'Faces'

In [36]:
client.delete_collection(collection_name)

Successfully deleted


In [37]:
client.delete_collection(collection_name)

Collection does not exist so nothing to delete


In [38]:
client.create_collection(collection_name, input_schema)

Collection Successfully created


In [39]:
client.create_collection(collection_name, input_schema)

Collection has been created


In [40]:
import torch
from tqdm import tqdm

In [41]:
for id_no in tqdm(range(10)):
    face_emb = torch.rand(1, 512)
    if len(face_emb) != 0:
        data_obj = {
            "id_no": "{}".format(id_no),
            "name": "anonymous"
        }
        client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

100%|█████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 228.69it/s]


In [42]:
client.read_document(collection_name, "1")

{'class': 'Faces',
 'creationTimeUnix': 1670925676792,
 'id': '748662a1-e4a4-4524-ae3a-ed536f9e0526',
 'lastUpdateTimeUnix': 1670925676792,
 'properties': {'id_no': '1', 'name': 'anonymous'},
 'vector': [0.8044259,
  0.49524188,
  0.1515665,
  0.10523105,
  0.18335748,
  0.8079511,
  0.94284445,
  0.36948317,
  0.55055916,
  0.8291821,
  0.26106906,
  0.28583246,
  0.088599384,
  0.6109948,
  0.8847174,
  0.73890394,
  0.43412143,
  0.17250818,
  0.9554995,
  0.79768944,
  0.07268375,
  0.016517282,
  0.56172806,
  0.95506793,
  0.8319671,
  0.3689282,
  0.43398964,
  0.06033069,
  0.8751132,
  0.7611622,
  0.0673638,
  0.024264216,
  0.73555976,
  0.54204375,
  0.031056345,
  0.67186916,
  0.886871,
  0.25352752,
  0.26881027,
  0.98482907,
  0.8779922,
  0.64283484,
  0.90749353,
  0.3335951,
  0.67971337,
  0.9269084,
  0.14807045,
  0.24886239,
  0.20035619,
  0.39370054,
  0.17250407,
  0.5389046,
  0.7343466,
  0.26919985,
  0.13435602,
  0.22132987,
  0.33493793,
  0.23451304,
 

In [43]:
client.delete_document(collection_name, '1')

Successfully deleted


In [44]:
client.delete_document(collection_name, '1')

This id does not exist so no deletion is done


In [45]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670925676797,
 'id': '305c5445-7275-4f08-b24c-5a7f5b4c589c',
 'lastUpdateTimeUnix': 1670925676797,
 'properties': {'id_no': '2', 'name': 'anonymous'},
 'vector': [0.50805855,
  0.54673564,
  0.8816101,
  0.7722875,
  0.7011872,
  0.5371612,
  0.44528598,
  0.16748738,
  0.40269518,
  0.2392484,
  0.92755634,
  0.14210105,
  0.7827839,
  0.15045977,
  0.70955133,
  0.43022704,
  0.2930116,
  0.6433553,
  0.40717906,
  0.624145,
  0.89150375,
  0.9869479,
  0.5825663,
  0.14877313,
  0.3227681,
  0.9537381,
  0.90660346,
  0.9552838,
  0.12334442,
  0.55419576,
  0.97314006,
  0.65582055,
  0.2476784,
  0.8792085,
  0.11847454,
  0.48522943,
  0.86401486,
  0.74376476,
  0.93074584,
  0.8078762,
  0.45288998,
  0.5442636,
  0.2557156,
  0.33655298,
  0.7688804,
  0.73219854,
  0.68764687,
  0.98365647,
  0.7939535,
  0.9970766,
  0.8618724,
  0.92280996,
  0.3982951,
  0.98769474,
  0.43060535,
  0.5556468,
  0.31543458,
  0.36502618,
  0.5357743,

In [46]:
update = {
    'id_no': '2',
    'vector': torch.rand(1, 512),
    'name': 'Trump'
}

In [47]:
client.update_document(collection_name, update)

In [48]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670925676797,
 'id': '305c5445-7275-4f08-b24c-5a7f5b4c589c',
 'lastUpdateTimeUnix': 1670925681333,
 'properties': {'id_no': '2', 'name': 'Trump'},
 'vector': [0.12383145,
  0.7795392,
  0.6398161,
  0.97631955,
  0.10757172,
  0.48015195,
  0.36791223,
  0.31275052,
  0.1751945,
  0.18525487,
  0.3274213,
  0.49739444,
  0.8933472,
  0.8197198,
  0.49501938,
  0.115997314,
  0.9715985,
  0.792939,
  0.7366389,
  0.12840235,
  0.25458586,
  0.795513,
  0.9481173,
  0.9587112,
  0.8777334,
  0.011775374,
  0.9495914,
  0.5796895,
  0.69216335,
  0.66718125,
  0.9226051,
  0.1402139,
  0.96255404,
  0.8347544,
  0.11963022,
  0.1372025,
  0.25215566,
  0.58030576,
  0.62903184,
  0.14475113,
  0.86024016,
  0.09956002,
  0.670078,
  0.029451549,
  0.2462591,
  0.047089636,
  0.25573468,
  0.18994802,
  0.9196747,
  0.6839662,
  0.9244311,
  0.564568,
  0.3642782,
  0.57468665,
  0.94557303,
  0.3405717,
  0.055693448,
  0.15112269,
  0.7028136,
  0

In [49]:
update = {
    'id_no': '2',
    'vector': torch.rand(1, 512),
    'age': '10'
}

In [50]:
client.update_document(collection_name, update)

Unknown field(s) in document


In [53]:
update = {
    'id_no': '4848272',
    'vector': torch.rand(1, 512),
}

In [54]:
client.update_document(collection_name, update)

Attempt to update a non-existent document. No update is done
