In [None]:
from random import random, randint
from math import floor, log
import networkx as nx
import numpy as np
import matplotlib as mtplt
from matplotlib import pyplot as plt
from VectorDbs.utils import *

vec_num = 40 # Number of vectors (nodes)
dim = 2
m_nearest_neighbor = 2
vec_pos = np.random.uniform(size=(vec_num, dim))

In [None]:
# Query
query_vec = [0.5, 0.5]

nodes = []
nodes.append(("Q",{"pos": query_vec}))

G_query = nx.Graph()
G_query.add_nodes_from(nodes)

print("nodes = ", nodes, flush=True)

pos_query=nx.get_node_attributes(G_query,'pos')

# Brute Force

In [None]:
(G_lin, G_best) = nearest_neigbor(vec_pos,query_vec)

pos_lin=nx.get_node_attributes(G_lin,'pos')
pos_best=nx.get_node_attributes(G_best,'pos')

fig, axs = plt.subplots()

nx.draw(G_lin, pos_lin, with_labels=True, node_size=150, node_color=[[0.8,0.8,1]], width=0.0, font_size=7, ax = axs)
nx.draw(G_query, pos_query, with_labels=True, node_size=200, node_color=[[0.5,0,0]], font_color='white', width=0.5, font_size=7, font_weight='bold', ax = axs)
nx.draw(G_best, pos_best, with_labels=True, node_size=200, node_color=[[0.85,0.7,0.2]], width=0.5, font_size=7, font_weight='bold', ax = axs)

In [None]:
# HNSW Construction
GraphArray = construct_HNSW(vec_pos, m_nearest_neighbor)

for layer_i in range(len(GraphArray)-1, -1, -1):
    fix, axs = plt.subplots()
    print("layer_i = ", layer_i)
    if layer_i > 0:
        pos_layer_0 = nx.get_node_attributes(GraphArray[0], 'pos')
        nx.draw(GraphArray[0], pos_layer_0, with_labels=True, node_size=120, node_color=[[0.9,0.9,1]], width=0.0,
                font_size=6, font_color=(0.65,0.65,0.65), ax=axs)
        
    pos_layer_i = nx.get_node_attributes(GraphArray[layer_i], 'pos')
    nx.draw(GraphArray[layer_i], pos_layer_i, with_labels=True, node_size=150, node_color=[[0.7,0.97,1]], width=0.5,
            font_size=7, ax=axs)
    nx.draw(G_query, pos_query, with_labels=True, node_size=200, node_color=[[0.8,0,0]], width=0.5, font_size=7,
            font_weight='bold', ax = axs)
    nx.draw(G_best, pos_best, with_labels=True, node_size=200, node_color=[[0.85,0.7,0.2]], width=0.5, font_size=7,
            font_weight='bold', ax = axs)
    plt.show()

In [None]:
# HNSW Search
(SearchPathGraphArray, EntryGraphArray) = search_HNSW(GraphArray, G_query)
for layer_i in range(len(GraphArray)-1, -1, -1):
    fix, axs = plt.subplots()
    print("layer_i = ", layer_i)
    G_path_layer = SearchPathGraphArray[layer_i]
    pos_path = nx.get_node_attributes(G_path_layer, 'pos')
    G_entry = EntryGraphArray[layer_i]
    pos_entry = nx.get_node_attributes(G_entry, 'pos')

    if layer_i > 0:
        pos_layer_0 = nx.get_node_attributes(GraphArray[0], 'pos')
        nx.draw(GraphArray[0], pos_layer_0, with_labels=True, node_size=120, node_color=[[0.9,0.9,1]], width=0.0,
                font_size=6, font_color=(0.65,0.65,0.65), ax=axs)
        
    pos_layer_i = nx.get_node_attributes(GraphArray[layer_i], 'pos')
    nx.draw(GraphArray[layer_i], pos_layer_i, with_labels=True, node_size=100, node_color=[[0.7,0.7,1]], width=0.5,
            font_size=6, ax=axs)
    nx.draw(G_path_layer, pos_path, with_labels=True, node_size=110, node_color=[[0.8,1,0.8]], width=0.5, font_size=6,
            ax=axs)
    nx.draw(G_query, pos_query, with_labels=True, node_size=80, node_color=[[0.8,0,0]], width=0.5, font_size=7,
            ax=axs)
    nx.draw(G_best, pos_best, with_labels=True, node_size=70, node_color=[[0.85,0.7,0.2]], width=0.5, font_size=7,
            ax=axs)
    nx.draw(G_entry, pos_entry, with_labels=True, node_size=80, node_color=[[0.1,0.9,0.1]], width=0.5, font_size=7,
            ax=axs)
    plt.show()

In [None]:
# Pure Vector Search with a vector database
import weaviate, json
from weaviate import EmbeddedOptions

client = weaviate.Client(embedded_options=EmbeddedOptions())
client.is_ready() # Can't continue because weaviate doesn't support Windows

In [None]:
# Resetting teh schema. CAUTION: This will delete your collections
# if client.scehma.exists("MyCollection"):
#     client.schema.delete_class("MyCollection")

schema = {
    "class": "MyCollection",
    "vectorizer": "none",
    "vectorIndexConfig": {
        "distance": "cosine",
    },
}
client.schema.create_class(schema)
print("Succesfully create the schema.")

In [None]:
# Import the Data
data = [
    {
        "title": "First Object",
        "foo": 99,
        "vector": [0.1, 0.1, 0.1, 0.1, 0.1, 0.1]
    },
    {
        "title": "Second Object",
        "foo": 77,
        "vector": [0.2, 0.3, 0.4, 0.5, 0.6, 0.7]
    },
    {
        "title": "Third Object",
        "foo": 55,
        "vector": [0.3, 0.1, -0.1, -0.3, -0.5, -0.7]
    },
    {
        "title": "Fourth Object",
        "foo": 33,
        "vector": [0.4, 0.41, 0.42, 0.43, 0.44, 0.45]
    },
    {
        "title": "Fifth Object",
        "foo": 11,
        "vector": [0.5, 0.5, 0, 0, 0, 0]
    },
]

In [None]:
client.batch.configure(batch_size=10)

# Batch import all objects, even those it's just 5 objects. It's a good habit
with client.batch as batch:
    for item in data:
        properties = {
            "title": item["title"],
            "foo": item["foo"],
        }
        client.batch.add_data_object(
            class_name="MyCollection",
            data_object=properties,
            vector=item["vector"], # Vector embeddings fo here
        )

In [None]:
response = (
    client.query
    .get("MyCollection", ["title"])
    .with_near_vector({"vector": [-0.012, 0.021, -0.23, -0.42, 0.5, 0.5]})
    .with_limit(2)
    .do()
)
result = response["data"]["Get"]["MyCollection"]
print(json.dumps(result, indent=2))

In [None]:
response = (
    client.query
    .get("MyCollection", ["title"])
    .with_near_vector({
        "vector": [-0.012, 0.021, -0.23, -0.42, 0.5, 0.5]
    })
    .with_limit(2) # limit the output to only 2
    .with_additional(["distance", "vector, id"])
    .do()
)

result = response["data"]["Get"]["MyCollection"]
print(json.dumps(result, indent=2))


In [None]:
response = (
    client.query
    .get("MyCollection", ["title", "foo"])
    .with_near_vector({
        "vector": [-0.012, 0.021, -0.23, -0.42, 0.5, 0.5]
    })
    .with_additional(["distance, id"]) # output the distance of the query vector to the objects in the database
    .with_where({
        "path": ["foo"],
        "operator": "GreaterThan",
        "valueNumber": 44
    })
    .with_limit(2) # limit the output to only 2
    .do()
)

result = response["data"]["Get"]["MyCollection"]
print(json.dumps(result, indent=2))

In [None]:
# nearObject Example
response = (
    client.query
    .get("MyCollection", ["title"])
    .with_near_object({"id": result[0]['_additional']['id']})
    .with_limit(3)
    .with_additional(["distance"])
    .do()
)
result = response["data"]["Get"]["MyCollection"]
print(json.dumps(result, indent=2))