In [1]:
from collections import OrderedDict, defaultdict
from typing import Callable, Tuple, Dict, List

import numpy as np
from tqdm.auto import tqdm

In [2]:
# pointA: (1, D); documents: (N, D)
def distance(pointA: np.ndarray, documents: np.ndarray) -> np.ndarray:
    return np.linalg.norm(pointA - documents, axis=1).reshape(-1, 1)

In [3]:
pointA = np.random.rand(1, 3)
documents = np.random.rand(10, 3)

In [4]:
dist = distance(pointA, documents)

In [5]:
dist.shape

(10, 1)

In [6]:
dist

array([[0.92746416],
       [0.81935806],
       [0.74958029],
       [0.68186471],
       [1.1651381 ],
       [0.73913866],
       [1.10971363],
       [0.83560655],
       [1.08783558],
       [0.77737634]])

In [7]:
np.argsort(dist, axis=0)

array([[3],
       [5],
       [2],
       [9],
       [1],
       [7],
       [0],
       [8],
       [6],
       [4]], dtype=int64)

In [8]:
dist[np.argsort(dist, axis=0)]

array([[[0.68186471]],

       [[0.73913866]],

       [[0.74958029]],

       [[0.77737634]],

       [[0.81935806]],

       [[0.83560655]],

       [[0.92746416]],

       [[1.08783558]],

       [[1.10971363]],

       [[1.1651381 ]]])

In [9]:
np.argsort(dist, axis=0)[-5:]

array([[7],
       [0],
       [8],
       [6],
       [4]], dtype=int64)

In [10]:
a = np.argsort(dist, axis=0).reshape(-1,)[:5]

In [11]:
np.random.choice(a, size=2, replace=False)

array([3, 2], dtype=int64)

In [12]:
{0: 1}

{0: 1}

In [13]:
a = [0, 1]
a.extend([2])
a

[0, 1, 2]

In [14]:
print([0, 1].extend([2]))

None


In [15]:
def create_sw_graph(
        data: np.ndarray,
        num_candidates_for_choice_long: int = 10,
        num_edges_long: int = 5,
        num_candidates_for_choice_short: int = 10,
        num_edges_short: int = 5,
        use_sampling: bool = False,
        sampling_share: float = 0.05,
        dist_f: Callable = distance
    ) -> Dict[int, List[int]]:
    # допишите ваш код здесь
    graph = {}
    for i, point in enumerate(data):
        candidates_for_point = []
        
        distances = dist_f(point, data)
        sorted_distances = np.argsort(distances, axis=0).reshape(-1,)
    
        further_points = sorted_distances[-num_candidates_for_choice_long:]
        further_points = np.random.choice(further_points, size=num_edges_long, replace=False)
        candidates_for_point.extend(list(further_points))
        
        closer_points = sorted_distances[1:num_candidates_for_choice_short+1]
        closer_points = np.random.choice(closer_points, size=num_edges_short, replace=False)
        candidates_for_point.extend(list(closer_points))
        graph[i] = candidates_for_point
    return graph

In [16]:
documents = np.random.rand(100, 5)

In [17]:
documents[0].reshape(1, -1).shape

(1, 5)

In [18]:
graph = create_sw_graph(documents)

In [19]:
graph[0]

[74, 10, 76, 37, 5, 91, 70, 22, 40, 92]

In [20]:
np.linalg.norm(documents[0] - documents[95]), np.linalg.norm(documents[0] - documents[80]),

(0.8315993874278309, 0.8653358891683167)

In [56]:
def nsw(query_point: np.ndarray,
        all_documents: np.ndarray, 
        graph_edges: Dict[int, List[int]],
        search_k: int = 10,
        num_start_points: int = 5,
        dist_f: Callable = distance) -> np.ndarray:
    # my code below 
    start_points = np.random.choice(all_documents.shape[0], size=num_start_points, replace=False)
    
    def search_candidates(query, graph, next_elem, visited, first_min_elem, second_min_elem):
        visited.append(next_elem)
        documents = all_documents[graph[start_point]]
        distances = dist_f(query, documents)
        sorted_distances = np.argsort(distances, axis=0).reshape(-1,)
        first_closest_candidate_ind = sorted_distances[0]
        second_closest_candidate_ind = sorted_distances[1]
        first_closest_candidate_dist = distances[first_closest_candidate_ind]
        second_closest_candidate_dist = distances[second_closest_candidate_ind]
        
        if first_min_elem['dist'] > first_closest_candidate_dist:
            first_min_elem['ind'] = first_closest_candidate_ind
            first_min_elem['dist'] = first_closest_candidate_dist
            next_elem = first_closest_candidate_ind
        
        if second_min_elem['dist'] > second_closest_candidate_dist and first_min_elem['ind'] != second_closest_candidate_ind:
            second_min_elem['ind'] = second_closest_candidate_ind
            second_min_elem['dist'] = second_closest_candidate_dist
            next_elem = second_closest_candidate_ind
        elif second_min_elem['dist'] > first_closest_candidate_dist and first_min_elem['ind'] != first_closest_candidate_ind:
            second_min_elem['ind'] = first_closest_candidate_ind
            second_min_elem['dist'] = first_closest_candidate_dist
            next_elem = first_closest_candidate_ind
            
        if next_elem in visited:
            print(first_min_elem)
            print(second_min_elem)
            return (first_min_elem, second_min_elem)
        search_candidates(query, graph, next_elem, visited, first_min_elem, second_min_elem)
        
    
    closest_candidates = []
    distances = []
    for start_point in start_points:
        first_elem = {'ind': -1, 'dist': float('inf')}
        second_elem = {'ind': -1, 'dist': float('inf')}
        visited = []
        first_min_elem, second_min_elem = search_candidates(query_point, graph, start_point, visited, first_elem, second_elem)
        closest_candidates.extend[[first_min_elem['ind'], second_min_elem['ind']]]
        distances.extend[[first_min_elem['dist'], second_min_elem['dist']]]  
    
    top_closest_points = np.argsort(np.array(distances), axis=0)[:search_k]
    closest_candidates = np.array(closest_candidates)[top_closest_points]
    return all_documents[closest_candidates]
            
        
        

In [57]:
query = np.array([1., 1., 1., 1., 1.])

In [58]:
query.shape

(5,)

In [59]:
candidates = nsw(query, documents, graph)

{'ind': 3, 'dist': array([1.07867288])}
{'ind': 5, 'dist': array([1.11483531])}


TypeError: cannot unpack non-iterable NoneType object