In [562]:
import weaviate
import torch
import os
import copy
import pprint
import json
import base64

class VectorManager:
    TYPE_MAP = {
        "int":["int"],
        "float":["number"],
        "double":["number"],
        "str": ["text"],
        "bool": ["boolean"],
        "datetime": ["date"],
        "list[int]":["int[]"],
        "list[str]":["text[]"],
        "list[float]": ["number[]"],
        "list[double]": ["number[]"],
    }
    
    def __init__(self) -> None:
        """
        Set up the connection

        INPUT: None
        ------------------------------------

        RETURNS: None
        ------------------------------------
        """
        self._client = weaviate.Client(f"http://{os.environ.get('WEAVIATE_HOST')}:{os.environ.get('WEAVIATE_C_PORT')}")
        
    def _traverse_map (self, map_dict):
        temp = []
        for k, v in map_dict.items():
            try:
                self.TYPE_MAP.get(v)
                temp.append({
                    "name": k,
                    "dataType": self.TYPE_MAP[v]
                })
            except:
                print(f'Invalid date type {v}')
                return None
        return temp
        
    def _id2uuid(self, collection_name: str, id_no: str) -> str:
        where_filter = {
            'operator': 'Equal',
            'valueText': id_no,
            'path': ["id_no"]
        }

        query_result =  self._client.query.get(collection_name, ["id_no", "_additional {id}"]).with_where(where_filter).do()
        if len(query_result['data']['Get'][collection_name]) < 1:
            print(f'id: {id_no} is not found')
            return None
        uuid = query_result['data']['Get'][collection_name][0]['_additional']['id']
        return uuid
    
    def _exists(self, collection_name: str, id_no: str) -> bool:
        where_filter = {
            'operator': 'Equal',
            'valueText': id_no,
            'path': ['id_no']
        }
        response = self._client.query.get(collection_name, ["id_no", "_additional {id}"]).with_where(where_filter).do()
        if len(response['data']['Get'][collection_name]) > 1:
            print("There exist duplicated files")
            return True
        elif len(response['data']['Get'][collection_name]) == 1:
            return True
        return False

    def delete_collection(self, collection_name: str) -> None:
        try:
            self._client.schema.delete_class(collection_name)
            print('Successfully deleted')
        except Exception as e:
            if '400' in str(e):
                print('Collection does not exist so nothing to delete')
            else:
                print(f"Unknown error with error message -> {e}")
            
    def delete_document(self, collection_name: str, id_no: str) -> None:
        # Check if the id exist
        id_exists = self._exists(collection_name, id_no)
        
        if not id_exists:
            print("This id does not exist so no deletion is done")
            return
        
        uuid = self._id2uuid(collection_name, id_no)
        
        try:
            self._client.data_object.delete(uuid=uuid, class_name=collection_name)
            print('Successfully deleted')
        except Exception as e:
            print(f"Unknown error with error message -> {e}")

    def create_collection(self, collection_name: str, schema: dict) -> None:
        """
        create a collection of documents

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        schema:             Schema for each document
                            example: {
                                "id_no": ["text"],
                            }

        RETURNS: None
        ------------------------------------
        """
        # Ensure that there is a id_no for the schema
        if not schema.get('id_no'):
            print('Lack of id_no as an attribute in property')
            return
        
        # Extract the document information into a list
        properties = self._traverse_map(schema)
        if properties == None:
            return None

        # Proper formetting for creating the schema
        document_schema = {
            'class': collection_name,
            'vectorizer': 'none',
            'properties': properties
        }

        # Try to create schema. If exists, gracefully exit.
        try:
            self._client.schema.create_class(document_schema)
            print("Collection Successfully created")
        except Exception as e:
            if '422' in str(e):
                print("Collection has been created")
            else:
                print(f"Unknown error with error message -> {e}")


    def create_document(self, collection_name: str, properties: dict, embedding: torch.Tensor) -> None:
        """
        create a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        properties:         Schema for the document
                            example: {
                                "id_no": 72671
                            }
        embedding:          embedding for the document
                            example: torch.Tensor([[
                                1, 2, ..., 512
                            ]])

        RETURNS: None
        ------------------------------------
        """
        # Check if the id_no attribute exist
        if not properties.get('id_no'):
            print('Lack of id_no as an attribute in property')
            return

        # Check if the id exist
        id_exists = self._exists(collection_name, id_no)
        
        if id_exists:
            print("This id already existed please use update instead")
            return

        # Create document
        try:
            self._client.data_object.create(
              properties,
              collection_name,
              vector = embedding
            )
        except Exception as e:
            print(f'Error in creating. Please read error message -> {e}')

    def read_document(self, collection_name: str, id_no: str) -> dict:
        """
        Find a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        id_no:              id of document
                            example: "72671"

        RETURNS: None
        ------------------------------------
        """
        if not self._exists(collection_name, id_no):
            print('Attempt to read a non-existent document. No reading is done')
            return
        
        # Create filter and search
        uuid = self._id2uuid(collection_name, id_no)
        return self._client.data_object.get_by_id(uuid = uuid, class_name = collection_name, with_vector = True)
    

    def update_document(self, collection_name: str, document: dict) -> None:
        """
        Update a document in a specified collecton

        INPUT: 
        ------------------------------------
        collection_name:    Name of collection
                            example shape:  'Faces'
        document:           dctionary format of the update document
                            {
                                'id_no': '4848272',
                                'vector': torch.Tensor([[
                                    1, 2, ..., 512
                                ]]),
                                'name': 'Trump'
                            }

        RETURNS: None
        ------------------------------------
        """
        if not document.get('id_no'):
            print('Lack of id_no result in ambiguous document to update')
            return
        if not self._exists(collection_name, document['id_no']):
            print('Attempt to update a non-existent document. No update is done')
            return
        if len(document.keys()) == 1:
            print(f'Only {document.keys()} is found which has nothing to update')
            return
        uuid = self._id2uuid(collection_name, document['id_no'])
        temp = copy.deepcopy(document)
        if 'vector' in document.keys():
            new_vector = temp['vector']
            if len(document.keys()) == 2:
                self._client.data_object.update(
                    data_object = {}, 
                    class_name = collection_name, 
                    uuid = uuid, 
                    vector = new_vector
                )
            else:
                del temp['id_no']
                del temp['vector']
                try:
                    self._client.data_object.update(
                        data_object = temp, 
                        class_name = collection_name, 
                        uuid = uuid, 
                        vector = new_vector
                    )
                except Exception as e:
                    if '400' in str(e):
                        print("Unknown field(s) in document")
                    else:
                        print(f"Unknown error with error message -> {e}")
        else:
            del temp['id_no']
            try:
                self._client.data_object.update(
                    data_object = temp, 
                    class_name = collection_name, 
                    uuid = uuid, 
                )
            except Exception as e:
                if '400' in str(e):
                    print("Unknown field(s) in document")
                else:
                    print(f"Unknown error with error message -> {e}")
                    

In [563]:
client = VectorManager()

In [564]:
user_schema = {
    "id_no":"str",
    "age":"int",
    "education":{
        "primary":{
            "school":"str"
        },
        "secondary":"str",
        "tertiary":"str"
    }
}
collection_name = 'Faces'

In [565]:
client.delete_collection(collection_name)

Successfully deleted


In [566]:
client.delete_collection(collection_name)

Collection does not exist so nothing to delete


In [567]:
client.create_collection(collection_name, user_schema)

Invalid date type {'primary': {'school': 'str'}, 'secondary': 'str', 'tertiary': 'str'}


In [568]:
user_schema = {
    "id_no":"str",
    "age":"int",
}

In [569]:
client.create_collection(collection_name, user_schema)

Collection Successfully created


In [570]:
import torch
from tqdm import tqdm

In [571]:
data_obj = {
    "id_no": "1",
    "age": "1"
}
face_emb = torch.rand(1, 512)
client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

Error in creating. Please read error message -> Creating object! Unexpected status code: 422, with response body: {'error': [{'message': "invalid object: invalid integer property 'age' on class 'Faces': requires an integer, the given value is '1'"}]}


In [572]:
for id_no in tqdm(range(10)):
    face_emb = torch.rand(1, 512)
    if len(face_emb) != 0:
        data_obj = {
            "id_no": "{}".format(id_no),
            "age": id_no
        }
        client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

100%|████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████████| 10/10 [00:00<00:00, 163.75it/s]


In [573]:
client.read_document(collection_name, "100")

Attempt to read a non-existent document. No reading is done


In [574]:
client.read_document(collection_name, "1")

{'class': 'Faces',
 'creationTimeUnix': 1670991107445,
 'id': '49be1a9d-7092-4e94-9ffa-68ec26cd3823',
 'lastUpdateTimeUnix': 1670991107445,
 'properties': {'age': 1, 'id_no': '1'},
 'vector': [0.29349566,
  0.33007097,
  0.05488366,
  0.98687977,
  0.4633636,
  0.66804916,
  0.15277505,
  0.9065038,
  0.11002028,
  0.45777327,
  0.62906504,
  0.95289725,
  0.02853012,
  0.46024966,
  0.30453932,
  0.19736958,
  0.79205364,
  0.37141836,
  0.2917769,
  0.11980069,
  0.66433305,
  0.65926254,
  0.50061053,
  0.26440203,
  0.5132738,
  0.3529334,
  0.19071782,
  0.3403195,
  0.6080931,
  0.20658839,
  0.16256529,
  0.834568,
  0.98473996,
  0.6452587,
  0.546962,
  0.43030852,
  0.9810637,
  0.084504664,
  0.4040258,
  0.5447229,
  0.6788327,
  0.69345367,
  0.58470684,
  0.64399856,
  0.7814555,
  0.06991166,
  0.4941376,
  0.5699665,
  0.32137883,
  0.11123693,
  0.116424024,
  0.95664746,
  0.5383089,
  0.28757888,
  0.34454775,
  0.49000412,
  0.9635229,
  0.5715403,
  0.22042078,
  0

In [575]:
client.delete_document(collection_name, '1')

Successfully deleted


In [576]:
client.delete_document(collection_name, '1')

This id does not exist so no deletion is done


In [577]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670991107451,
 'id': '9d55cb01-fcc3-40c2-a968-dd96a022fd9b',
 'lastUpdateTimeUnix': 1670991107451,
 'properties': {'age': 2, 'id_no': '2'},
 'vector': [0.18031937,
  0.16548133,
  0.1206848,
  0.22532094,
  0.55881214,
  0.9631694,
  0.02091813,
  0.40613872,
  0.56598544,
  0.9711465,
  0.7740909,
  0.4374069,
  0.902713,
  0.71866286,
  0.9621902,
  0.44910645,
  0.90537155,
  0.833003,
  0.52923477,
  0.88798684,
  0.5047532,
  0.26033628,
  0.03995323,
  0.86682206,
  0.39955014,
  0.08947402,
  0.08880353,
  0.4505359,
  0.6089334,
  0.24964297,
  0.97269875,
  0.6496481,
  0.29843497,
  0.27283704,
  0.3661989,
  0.25011057,
  0.8728687,
  0.48689806,
  0.7024007,
  0.9125298,
  0.84639484,
  0.4776631,
  0.4603675,
  0.77279204,
  0.26601148,
  0.54847425,
  0.16843182,
  0.46156436,
  0.79791445,
  0.16447788,
  0.6057176,
  0.4675814,
  0.93094236,
  0.9745226,
  0.16155827,
  0.75090146,
  0.8178439,
  0.33384323,
  0.25515646,
  0.863

In [578]:
update = {
    'id_no': '2',
    'vector': torch.rand(1, 512),
    'age': 1
}

In [579]:
client.update_document(collection_name, update)

In [580]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670991107451,
 'id': '9d55cb01-fcc3-40c2-a968-dd96a022fd9b',
 'lastUpdateTimeUnix': 1670991109499,
 'properties': {'age': 1, 'id_no': '2'},
 'vector': [0.75144804,
  0.70438665,
  0.68881327,
  0.09228647,
  0.6093661,
  0.85971487,
  0.9612069,
  0.30578893,
  0.76397353,
  0.7298373,
  0.28015536,
  0.24559295,
  0.21606648,
  0.70721084,
  0.83606815,
  0.27796596,
  0.22905672,
  0.12034172,
  0.26684535,
  0.30414057,
  0.07869148,
  0.46311563,
  0.3821656,
  0.8522912,
  0.5625841,
  0.21592438,
  0.017917275,
  0.40956426,
  0.49943882,
  0.87246543,
  0.8072591,
  0.6595145,
  0.9175482,
  0.74008346,
  0.663583,
  0.8277258,
  0.09586936,
  0.7338712,
  0.9689895,
  0.23500371,
  0.07708663,
  0.30546653,
  0.80394936,
  0.01987648,
  0.05305195,
  0.8384113,
  0.54823136,
  0.84653914,
  0.40859008,
  0.30560035,
  0.56767654,
  0.11030853,
  0.44325304,
  0.5320392,
  0.9652636,
  0.86452264,
  0.5660206,
  0.55634147,
  0.25072312,


In [581]:
update = {
    'id_no': '2',
    'vector': torch.rand(1, 512),
    'name': 'trump'
}

In [582]:
client.update_document(collection_name, update)

Unknown field(s) in document


In [583]:
update = {
    'id_no': '4848272',
    'vector': torch.rand(1, 512),
}

In [584]:
client.update_document(collection_name, update)

Attempt to update a non-existent document. No update is done


In [587]:
update = {
    'id_no': '2',
}

In [588]:
client.update_document(collection_name, update)

Only dict_keys(['id_no']) is found which has nothing to update


In [589]:
update = {
    'id_no': '2',
    'age': 5
}

In [590]:
client.update_document(collection_name, update)

In [591]:
client.read_document(collection_name, "2")

{'class': 'Faces',
 'creationTimeUnix': 1670991107451,
 'id': '9d55cb01-fcc3-40c2-a968-dd96a022fd9b',
 'lastUpdateTimeUnix': 1670991170064,
 'properties': {'age': 5, 'id_no': '2'},
 'vector': [0.75144804,
  0.70438665,
  0.68881327,
  0.09228647,
  0.6093661,
  0.85971487,
  0.9612069,
  0.30578893,
  0.76397353,
  0.7298373,
  0.28015536,
  0.24559295,
  0.21606648,
  0.70721084,
  0.83606815,
  0.27796596,
  0.22905672,
  0.12034172,
  0.26684535,
  0.30414057,
  0.07869148,
  0.46311563,
  0.3821656,
  0.8522912,
  0.5625841,
  0.21592438,
  0.017917275,
  0.40956426,
  0.49943882,
  0.87246543,
  0.8072591,
  0.6595145,
  0.9175482,
  0.74008346,
  0.663583,
  0.8277258,
  0.09586936,
  0.7338712,
  0.9689895,
  0.23500371,
  0.07708663,
  0.30546653,
  0.80394936,
  0.01987648,
  0.05305195,
  0.8384113,
  0.54823136,
  0.84653914,
  0.40859008,
  0.30560035,
  0.56767654,
  0.11030853,
  0.44325304,
  0.5320392,
  0.9652636,
  0.86452264,
  0.5660206,
  0.55634147,
  0.25072312,
