# Setup
- Change `MODEL` to use different models: `tiny`, `tiny_longer`, `small`, `small_pretrained`

In [1]:
import sys
import os
from dotenv import load_dotenv
from random import randint
import numpy as np
import timeit

load_dotenv()
sys.path.append(os.getenv('CODE_ROOT_PATH'))

from src.models.lsh_nn import LSH

MODEL = 'tiny'
NN_QUERY_INDICES = os.path.join(os.getenv('TRAINED_MODELS'), 'nn_query_indices.npy')
FEATURES_PATH = os.path.join(os.getenv('TRAINED_MODELS'), MODEL, 'features.npy')
NN_EASY_CANDIDATES = os.path.join(os.getenv('TRAINED_MODELS'), MODEL, 'nn_easy_candidates.npy')
NN_MEDIUM_CANDIDATES = os.path.join(os.getenv('TRAINED_MODELS'), MODEL, 'nn_medium_candidates.npy')
NN_HARD_CANDIDATES = os.path.join(os.getenv('TRAINED_MODELS'), MODEL, 'nn_hard_candidates.npy')

# Generate LSH Index Datastructure

In [2]:
lsh = LSH(features_path=FEATURES_PATH)

## Generate Index: Runtime Experiment

In [3]:
times = 1000
result = timeit.timeit('LSH(features_path=FEATURES_PATH, print_details=False)', globals=globals(), number=times)
print(result / times)

# Query the LSH Datastructure

Patches are classified as follows. Note that this classification is still *partly subjective*.

Difficulty | #Patches | Explanation | Indices
--- | --- | --- | ---
**Easy** | 10 | Single or very simple concept (regular pattern) | 2, 7, 11, 18, 20, 21, 23, 28, 61, 75
**Medium** | 20 | Multiple or more complex concept (multiple patterns) | 1, 6, 10, 17, 37, 42, 93, 153, 154, 175, 324, 549, 734, 757, 813, 1109, 1168, 1198, 1290, 1489
**Hard** | 10 | Concept only takes up a small portion of the pixels (small patterns) | 14, 25, 44, 57, 217, 420, 685, 817, 1011, 1355

In [4]:
example_indices = [
    2, 7, 11, 18, 20, 21, 23, 28, 61, 75,  # Easy
    1, 6, 10, 17, 37, 42, 93, 153, 154, 175, 324, 549, 734, 757, 813, 1109, 1168, 1198, 1290, 1489,  # Medium
    14, 25, 44, 57, 217, 420, 685, 817, 1011, 1355,  # Hard
]
np.save(os.path.join(os.getenv('TRAINED_MODELS'), 'nn_query_indices.npy'), example_indices)

In [5]:
features = np.load(FEATURES_PATH).astype(np.float32)
queries = features[np.load(NN_QUERY_INDICES)]
# Search 100 nearest neighbors for "easy" patches (k=101 because first "NN" is query patch itself, as included in dataset)
nn_easy_candidates = np.array([lsh.knn(queries[i], k=101) for i in range(0, 10)])
np.save(NN_EASY_CANDIDATES, nn_easy_candidates)
# Search 50 nearest neighbors for "medium" patches
nn_medium_candidates = np.array([lsh.knn(queries[i], k=51) for i in range(10, 30)])
np.save(NN_MEDIUM_CANDIDATES, nn_medium_candidates)
# Search 20 nearest neighbors for "hard" patches
nn_hard_candidates = np.array([lsh.knn(queries[i], k=21) for i in range(30, 40)])
np.save(NN_HARD_CANDIDATES, nn_hard_candidates)

## Query Index: Runtime Experiment

In [6]:
features = np.load(FEATURES_PATH).astype(np.float32)
def query(i):
    lsh.knn(features[i], 100)

result = timeit.timeit('[query(n) for n in range(len(features))]', globals=globals(), number=5)
print(result/(5*400.000))