# Initialisation

In [1]:
from utils.WeaviateManager import VectorManager

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
client = VectorManager()

# Create Collection

In [3]:
# Test 1.1 Attempt to create collection with nested schema - error

user_schema = {
    "id_no":"str",
    "age":"int",
    "education":{
        "primary":{
            "school":"str"
        },
        "secondary":"str",
        "tertiary":"str"
    }
}
collection_name = 'Faces'

client.create_collection(collection_name, user_schema)

{'response': 'Unknown data type in the field'}

In [4]:
# Test 1.2 Attempt to create collection with invalid data type - error

user_schema = {
    "id_no":"str",
    "age":"int",
    "education": "Unknown Object"
}
collection_name = 'Faces'

client.create_collection(collection_name, user_schema)

{'response': 'Unknown data type in the field'}

In [5]:
# Test 1.3 Attempt to create collection with data type

user_schema = {
    "id_no":"str",
    "age":"int",
}
collection_name = 'Faces'
print(client.delete_collection(collection_name)['response'])
print(client.create_collection(collection_name, user_schema)['response'])

# Test 1.4 Attempt to create collection with the name collection name - error
user_schema = {
    "id_no":"str",
    "age":"int",
}
collection_name = 'Faces'

print(client.create_collection(collection_name, user_schema)['response'])

200
200
Unknown error with error message -> Create class! Unexpected status code: 422, with response body: {'error': [{'message': "Name 'Faces' already used as a name for an Object class"}]}


# Create Document

In [6]:
import torch

In [7]:
# Test 2.1 Attempt to create with data that does not abide the schema - error
data_obj = {
    "id_no": "1",
    "age": "1"
}
face_emb = torch.rand(1, 5)
client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)['response']

'Creating object! Unexpected status code: 422, with response body: {\'error\': [{\'message\': "invalid object: invalid integer property \'age\' on class \'Faces\': requires an integer, the given value is \'1\'"}]}'

In [8]:
# Test 2.2 Attempt to create with original data field
for id_no in range(10):
    face_emb = torch.rand(1, 5)
    if len(face_emb) != 0:
        data_obj = {
            "id_no": "{}".format(id_no),
            "age": id_no
        }
    print(client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)['response'])

200
200
200
200
200
200
200
200
200
200


In [9]:
# Test 2.3 Attempt to create with new data field
data_obj = {
    "id_no": "11",
    "new": "2"
}
face_emb = torch.Tensor([0.5766745, 0.9341823, 0.7021697, 0.54776406, 0.013553977])
client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

{'response': '200'}

In [10]:
# Test 2.4 Attempt to create data_object with the same name -> error
data_obj = {
    "id_no": "11",
    "new": "2"
}
face_emb = torch.rand(1, 5)
client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

{'response': 'This id already existed please use update instead'}

In [11]:
# Test 2.5 Attempt to create data_object with different embedding length -> error
data_obj = {
    "id_no": "12",
    "age": 12
}
face_emb = torch.rand(1, 10)
client.create_document(collection_name = 'Faces', properties = data_obj, embedding = face_emb)

{'response': 'Mistmatch vector length, creation failed'}

# Read Document

In [12]:
# 3.2 Attempt to read a non existent document id number -> error

client.read_document(collection_name, "100")

{'response': 'Attempt to read a non-existent document. No reading is done'}

In [13]:
# 3.2 Attempt to read a non existent collection -> error

client.read_document('Non-existence', "1")

{'response': 'Attempt to read a non-existent document. No reading is done'}

In [14]:
# 3.3 Attempt to read one of the file in 2.2

client.read_document(collection_name, "1")

{'response': {'class': 'Faces',
  'creationTimeUnix': 1671087077726,
  'id': '258953ae-4aad-43dc-aa9d-5f74bf38dfc4',
  'lastUpdateTimeUnix': 1671087077726,
  'properties': {'age': 1, 'id_no': '1'},
  'vector': [0.14229017, 0.43621045, 0.3271194, 0.9458164, 0.36649644],
  'vectorWeights': None}}

In [15]:
# 3.3 Attempt to read one of the file in 2.3

client.read_document(collection_name, "11")

{'response': {'class': 'Faces',
  'creationTimeUnix': 1671087077765,
  'id': '156443f7-df34-479e-8274-59833a1655ef',
  'lastUpdateTimeUnix': 1671087077765,
  'properties': {'id_no': '11', 'new': '2'},
  'vector': [0.5766745, 0.9341823, 0.7021697, 0.54776406, 0.013553977],
  'vectorWeights': None}}

In [16]:
# 3.4 Attempt to read document nearest to the embedding

embedding = torch.Tensor([0.5766745, 0.9341823, 0.7021697, 0.54776406, 0.013553977])

client.get_top_k(collection_name, embedding)

{'response': [{'response': {'class': 'Faces',
    'creationTimeUnix': 1671087077765,
    'id': '156443f7-df34-479e-8274-59833a1655ef',
    'lastUpdateTimeUnix': 1671087077765,
    'properties': {'id_no': '11', 'new': '2'},
    'vector': [0.5766745, 0.9341823, 0.7021697, 0.54776406, 0.013553977],
    'vectorWeights': None},
   'certainty': 0.9999999403953552}]}

In [17]:
# 3.5 Attempt to read top k document nearer to the embedding

embedding = torch.Tensor([0.5766745, 0.9341823, 0.7021697, 0.54776406, 0.013553977])

client.get_top_k(collection_name, embedding, 3)

{'response': [{'response': {'class': 'Faces',
    'creationTimeUnix': 1671087077765,
    'id': '156443f7-df34-479e-8274-59833a1655ef',
    'lastUpdateTimeUnix': 1671087077765,
    'properties': {'id_no': '11', 'new': '2'},
    'vector': [0.5766745, 0.9341823, 0.7021697, 0.54776406, 0.013553977],
    'vectorWeights': None},
   'certainty': 0.9999999403953552},
  {'response': {'class': 'Faces',
    'creationTimeUnix': 1671087077751,
    'id': 'e999d2c4-22cb-420b-b114-52ecaf97f7e5',
    'lastUpdateTimeUnix': 1671087077751,
    'properties': {'age': 8, 'id_no': '8'},
    'vector': [0.3250035, 0.38524753, 0.38620043, 0.4955626, 0.06325245],
    'vectorWeights': None},
   'certainty': 0.97750523686409},
  {'response': {'class': 'Faces',
    'creationTimeUnix': 1671087077754,
    'id': 'fba70226-d09a-4403-ad9d-d8cb00058a2a',
    'lastUpdateTimeUnix': 1671087077754,
    'properties': {'age': 9, 'id_no': '9'},
    'vector': [0.44360954, 0.9544769, 0.73395604, 0.38870186, 0.5922022],
    'vector

In [19]:
# 3.6 Attempt to read top k document nearer to the wrong embedding -> error

embedding = torch.Tensor([0.5766745, 0.9341823, 0.7021697, 0.54776406])

client.get_top_k(collection_name, embedding, 10)

{'response': [{'locations': [{'column': 6, 'line': 1}],
   'message': "explorer: get class: vector search: object vector search at index faces: shard faces_RGNS7TtEBkmW: vector search: knn search: distance between entrypoint and query node: vector lengths don't match: 5 vs 4",
   'path': ['Get', 'Faces']}]}

# Update

In [20]:
# 4.1 Attempt to update non-existence id -> error

update = {
    'id_no': '12',
}

client.update_document(collection_name, update)

{'response': "Only dict_keys(['id_no']) is found which insufficient to update"}

In [21]:
# 4.2 Attempt to update with different vector dimension -> error

update = {
    'id_no': '2',
    'vector': torch.rand(1, 3),
}

print(client.read_document(collection_name, "2"))
client.update_document(collection_name, update)
print(client.read_document(collection_name, "2"))

{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087077729, 'properties': {'age': 2, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}
{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087145732, 'properties': {'age': 2, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}


In [22]:
# 4.3 Attempt to update with new field -> error

update = {
    'id_no': '2',
    'vector': torch.rand(1, 5),
    'non-known': 1
}
print(client.read_document(collection_name, "2"))
client.update_document(collection_name, update)
print(client.read_document(collection_name, "2"))

{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087145732, 'properties': {'age': 2, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}
{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087145732, 'properties': {'age': 2, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}


In [23]:
# 4.4 Attempt to update with only id -> error

update = {
    'id_no': '2',
}

client.update_document(collection_name, update)

{'response': "Only dict_keys(['id_no']) is found which insufficient to update"}

In [24]:
# 4.5 Attempt to update age with wrong data type -> error

update = {
    'id_no': '2',
    'age': '2'
}

client.update_document(collection_name, update)

{'response': 'Update of the object not successful! Unexpected status code: 422, with response body: {\'error\': [{\'message\': "msg:bad request code:400 err:invalid integer property \'age\' on class \'Faces\': requires an integer, the given value is \'2\'"}]}'}

In [25]:
# 4.6 Attempt to update age with right data type

update = {
    'id_no': '2',
    'age': 100
}
print(client.read_document(collection_name, "2"))
client.update_document(collection_name, update)
print(client.read_document(collection_name, "2"))

{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087145732, 'properties': {'age': 2, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}
{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087150734, 'properties': {'age': 100, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}


In [26]:
# 4.6 Attempt to update vector only

update = {
    'id_no': '2',
    'vector': torch.rand(1, 5).numpy(),
}
print(client.read_document(collection_name, "2"))
client.update_document(collection_name, update)
print(client.read_document(collection_name, "2"))

{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087150734, 'properties': {'age': 100, 'id_no': '2'}, 'vector': [0.23623401, 0.016892731, 0.4515667, 0.45330882, 0.34699374], 'vectorWeights': None}}
{'response': {'class': 'Faces', 'creationTimeUnix': 1671087077729, 'id': 'caca7778-f909-44b1-8ac7-18463c3f7221', 'lastUpdateTimeUnix': 1671087152168, 'properties': {'age': 100, 'id_no': '2'}, 'vector': [0.5929948, 0.53583974, 0.8198866, 0.24992085, 0.6878032], 'vectorWeights': None}}


# Delete

In [27]:
# 5.1 Delete document

client.delete_document(collection_name, "2")
client.read_document(collection_name, "2")

{'response': 'Attempt to read a non-existent document. No reading is done'}

In [28]:
# 5.2 Delete a nonexistence document -> error

client.delete_document(collection_name, "2")

{'response': 'id: 2 is not found'}

In [29]:
# 5.3 Delete collection

client.delete_collection(collection_name)
client.read_document(collection_name, "1")

{'response': 'Attempt to read a non-existent document. No reading is done'}

In [30]:
# 5.4 Delete non existence collection -> error

client.delete_collection(collection_name)

{'response': 'Delete class from schema! Unexpected status code: 400, with response body: {\'error\': [{\'message\': "could not find class \'Faces\'"}]}'}