In [23]:
# from utils import *
import random
import Graph as G
import numpy as np


# Get vertices from DB and insert them into the graph
def Initialize_Random_Graph(DBPath='dataset/DB1K.csv',R=5):
    DBGraph=G.Graph()
    with open(DBPath, 'r') as f:
        DB = f.readlines()
        DB = [x.strip() for x in DB]
        for i,x in enumerate(DB):
            x = x.split(',')
            x= np.array(x,dtype=float)
            DBGraph.add_vertex(G.Vertex(int(x[0]),x[1:]))
            # Graph.Insert(x[0], x[1], x[2])
        # Choose R random neighbors for each vertex
        size= len(DBGraph.verticies)
        if(size==0 or size==1):
            return
        for vertex in DBGraph:
            for i in range(R):
                neighbor= DBGraph.get_vertex(int(random.random()*size))
                while(neighbor==vertex):
                    neighbor= DBGraph.get_vertex(int(random.random()*size))
                DBGraph.add_edge((vertex.key,vertex.value),(neighbor.key,neighbor.value))
    return DBGraph


#gets euclidean distance between 2 vectors
def get_distance(v1,v2):
    return  np.linalg.norm(v1-v2)


#gets medoid of a graph
def get_medoid(DBGraph):
    min_distance=10000000000000000000
    medoid=None
    for vertex in DBGraph:
        current_total_distance=0
        for vertex2 in DBGraph:
            if(vertex==vertex2):
                continue
            dist=get_distance(vertex.value,vertex2.value)
            current_total_distance+=dist
        
        if(current_total_distance<min_distance):
            min_distance=current_total_distance
            medoid=vertex
    return medoid,min_distance



#gets arg min distance from any vertex in Anyset to Query and min dist
def get_min_dist (Anyset,Query):
    min_dist=10000000000000000000
    min_vertex=None
    for vertex in Anyset:
        # print("vertex",vertex)
        # print("Query",Query[:3])
        dist=get_distance(vertex.value,Query)
        if(dist<min_dist):
            min_dist=dist
            min_vertex=vertex
    return min_vertex,min_dist


#initially, start is the medoid
# s is a vertex, Query is a vector
# k is a number, L is a number
def Greedy_Search(start,Query,k,L):
    search_List={start}
    Visited=set()
    #TODO: make the visited and the possible frontier set of indices instead of vertices to save ram.
    possible_frontier=search_List #.difference(Visited)
    while possible_frontier != set():
        # print("QUERY",Query)
        p_star,_= get_min_dist(possible_frontier,Query)
        #
        # print('pstar',p_star)
        # if p_star==None:
        #     # break
        #     print('frontier: ')
        #     for v in possible_frontier:
        #         print(v)
        #     print(possible_frontier==set())
        search_List=search_List.union(p_star.neighbors)
        Visited.add(p_star)
        if(len(search_List)>L):
            #update search list to retain closes L points to x_q
            search_ListL_L=list(search_List)
            search_ListL_L.sort(key=lambda x: get_distance(x.value,Query))
            # only maintain L closest points
            search_ListL_L=search_ListL_L[:L]
            search_List=set(search_ListL_L)

        possible_frontier=search_List.difference(Visited)
        
    search_ListL_L=list(search_List)
    search_ListL_L.sort(key=lambda x: get_distance(x.value,Query))
    # only maintain k closest points
    search_ListL_L=search_ListL_L[:k]
    search_List=set(search_ListL_L)
    return search_List,Visited

# Robust pruning 
def Robust_Prune(point,candidate_set,alpha,R):
    # print(candidate_set)
    candidate_set=candidate_set.union(point.neighbors)
    candidate_set.difference({point}) # changed
    point.neighbors=set()
    while candidate_set!=set():
        # print("candidate_set", candidate_set)
        # print("point.value", point.value)

        p_star,min_dist= get_min_dist(candidate_set,point.value)
        point.neighbors.add(p_star)
        if(len(point.neighbors)==R):
            break
        DummySet=candidate_set.copy()
        for candidatePoint in candidate_set:
            if(alpha * get_distance(p_star.value,candidatePoint.value)<=get_distance(candidatePoint.value,point.value)):
                DummySet.remove(candidatePoint)
        candidate_set=DummySet


def Build_Index(datasetPath, R, L):

    # R = min(R, len(dataset))

    index=Initialize_Random_Graph(datasetPath,R)
    # el graph byt2ry tmam
    medoid,_=get_medoid(index)
    iterationOverGraph(index,medoid,R,L,1) #alpha=1
    iterationOverGraph(index,medoid,R,L,2) #alpha=2

    return index,medoid
    
def iterationOverGraph(Graph,medoid,R,L,alpha):
    # print('medoid',medoid)
    randIndex = list(Graph.get_vertices())
    random.shuffle(randIndex)
    # random permutation + sequential graph update
    for n in randIndex:
        node = Graph.get_vertex(n)
        # print(n)

        (_, V) = Greedy_Search(medoid, node.value, k = 1, L = L)
        Robust_Prune(node, V, alpha, R= R)
        neighbors = node.get_neighbors()
        
        for inb in neighbors:
            
            # CHECK : The backward edge is always added
            # check here in case we shouldn't add it in all cases ? Might be incorrect? 
            inb.add_neighbor(node)
            if len(inb.get_neighbors()) > R:
                # print("inb.get_neighbors()", inb.get_neighbors())
                Robust_Prune(inb, inb.get_neighbors(), alpha, R= R)
    

In [29]:
R=5
k=10
L=15

path='dataset/DB10K.csv'

# DBGraph=Initialize_Random_Graph(path,R)
# # print(DBGraph)
# medoid,min_distance=get_medoid(DBGraph)

index,medoid=Build_Index(path, R, L)

random_vec=np.random.rand(1,70)
Ler,_=Greedy_Search(medoid,random_vec,k,L)
# def get_max_dist(Ler,Query):
#     max_dist=0
#     max_vertex=None
#     for vertex in Ler:
#         # print("vertex",vertex)
#         # print("Query",Query[:3])
#         dist=get_distance(vertex.value,Query)
#         if(dist>max_dist):
#             max_dist=dist
#             max_vertex=vertex
#     return max_vertex,max_dist
# def get_min_dist (Anyset,Query):
#     min_dist=10000000000000000000
#     min_vertex=None
#     for vertex in Anyset:
#         # print("vertex",vertex)
#         # print("Query",Query[:3])
#         dist=get_distance(vertex.value,Query)
#         if(dist<min_dist):
#             min_dist=dist
#             min_vertex=vertex
#     return min_vertex,min_dist

# mm=get_min_dist(index,random_vec)
# print(mm)
#how to choose query? how to choose k,L?

KeyboardInterrupt: 

In [27]:
print('The vector is:')
print(random_vec)
print('------------------------')
print('len of Ler is:',len(Ler))
for lol in Ler:
    print(lol.value)
    print(get_distance(lol.value,random_vec))
    print('------------------------')

The vector is:
[[0.34284685 0.61345826 0.56520196 0.26185731 0.41247723 0.46176766
  0.1438098  0.57236434 0.78980605 0.12647628 0.3513893  0.08497301
  0.92180417 0.54813765 0.94537368 0.13277892 0.083365   0.99465197
  0.44294356 0.05252577 0.5740854  0.31713588 0.48172388 0.96237276
  0.96697401 0.5421257  0.26841494 0.37521851 0.31373366 0.1691582
  0.50542544 0.87601951 0.47641725 0.3665871  0.46014819 0.8920753
  0.68477186 0.64502706 0.86036474 0.70339308 0.64521135 0.58645342
  0.53974365 0.17843152 0.38169287 0.29472313 0.72344823 0.32939751
  0.60630288 0.73172608 0.17319438 0.72985526 0.23830515 0.09100695
  0.26701468 0.3318234  0.74757218 0.69168251 0.00200957 0.0464958
  0.63914038 0.52147972 0.90342694 0.17439585 0.57874142 0.21475757
  0.87792499 0.36408532 0.50508989 0.17784584]]
------------------------
len of Ler is: 10
[0.30936 0.34488 0.74494 0.7653  0.42406 0.55318 0.16336 0.48436 0.77142
 0.68578 0.42843 0.038   0.4909  0.94052 0.70254 0.49991 0.55169 0.60887
 0.

In [None]:
from memory_profiler import memory_usage

def run_queries():
    


def memory_usage_run_queries(args):
    global results
    # This part is added to calcauate the RAM usage
    mem_before = max(memory_usage())
    mem = memory_usage(proc=(run_queries, args, {}), interval = 1e-3)
    return results, max(mem) - mem_before