# NMSLearn
> NMSLib objects avalible just like sklearn's nearest_neighbors API

## Instalation

just run:

```pip install git+https://github.com/AlanGanem/NMSLearn ```

or  clone and run setup.py

Available only for python 3.8 or lower

# Available Wrappers

- [X] `NMSLibSklearnWrapper` - Base class for building classes for different metrics
- [X] `FastJaccardNN` - Class to perform Jaccard dissimilarity based nearesst neighbors search
- [X] `FastL2NN` - Class to perform L2(euclidean) distance based nearesst neighbors search
- [X] `FastKLDivNN`  - Class to perform Kullback-Leibler divergence based nearesst neighbors search

## Usage example

In [1]:
#hide
import sys
sys.path.append('..')

In [2]:
#import L2 nearest neighbor class
from nmslearn.neighbors import FastL2NN
#numpy for data generation
import numpy as np

In [3]:
#make random data to create index
X = np.random.randn(100_000, 10)
#instantiate nearest neighbor object with default parameters
l2nn = FastL2NN(verbose = True)
#fit the index
l2nn.fit(X)

FastL2NN(verbose=True)

In [4]:
#use fitted index to query new data
query_matrix = np.random.randn(3, 10)

distances, indexes = l2nn.kneighbors(query_matrix, n_neighbors = 30, n_jobs = 8, return_distance = True)

distances, indexes

kNN time total=0.008003 (sec), per query=0.002668 (sec), per query adjusted for thread number=0.021342 (sec)


([array([1.3245634, 1.6304972, 2.3469138, 2.5226061, 2.571098 , 2.5826378,
         2.58591  , 2.7503452, 2.9602616, 3.051869 , 3.0761793, 3.0987031,
         3.1178   , 3.126969 , 3.1348734, 3.2131133, 3.2408729, 3.2818627,
         3.3321965, 3.339151 , 3.3681705, 3.3815045, 3.3855784, 3.4641187,
         3.5331888, 3.5802543, 3.6158307, 3.6223383, 3.666287 , 3.6673372],
        dtype=float32),
  array([0.61047035, 0.6418276 , 0.7520662 , 0.8606815 , 0.8694241 ,
         0.89354324, 0.9396638 , 1.0212313 , 1.046349  , 1.0705577 ,
         1.0905973 , 1.1210229 , 1.1351492 , 1.214601  , 1.2423488 ,
         1.2616279 , 1.2741274 , 1.4380698 , 1.4898877 , 1.6442922 ,
         1.6522729 , 1.6648743 , 1.6743813 , 1.6807532 , 1.6844735 ,
         1.7031903 , 1.7120422 , 1.7446207 , 1.7527819 , 1.7674448 ],
        dtype=float32),
  array([3.6314204, 4.0911403, 4.2446575, 4.4768524, 4.939749 , 5.056326 ,
         5.184124 , 5.2949767, 5.351741 , 5.3570704, 5.376051 , 5.4323053,
         5.

In [5]:
import joblib
#serialize object with joblib or any other serializer
joblib.dump(l2nn,'l2nn.sav')

#deserialze
l2nn = joblib.load('l2nn.sav')

In [6]:
#appends to index with partial_fit method
l2nn.partial_fit(X)

FastL2NN(verbose=True)

In [7]:
#query again with 

distances, indexes = l2nn.kneighbors(query_matrix, n_neighbors = 30, n_jobs = 8, return_distance = True)

distances, indexes

kNN time total=0.027713 (sec), per query=0.009238 (sec), per query adjusted for thread number=0.073902 (sec)


([array([1.3245634, 1.3245634, 1.6304972, 1.6304972, 2.3469138, 2.3469138,
         2.5226061, 2.5226061, 2.571098 , 2.571098 , 2.5826378, 2.5826378,
         2.58591  , 2.58591  , 2.7503452, 2.7503452, 2.9602616, 2.9602616,
         3.051869 , 3.051869 , 3.0761793, 3.0761793, 3.0987031, 3.0987031,
         3.1178   , 3.1178   , 3.126969 , 3.126969 , 3.1348734, 3.1348734],
        dtype=float32),
  array([0.61047035, 0.61047035, 0.6418276 , 0.6418276 , 0.7520662 ,
         0.7520662 , 0.8606815 , 0.8606815 , 0.8694241 , 0.8694241 ,
         0.89354324, 0.89354324, 0.9396638 , 0.9396638 , 1.0212313 ,
         1.0212313 , 1.046349  , 1.046349  , 1.0705577 , 1.0705577 ,
         1.0905973 , 1.0905973 , 1.1210229 , 1.1210229 , 1.1351492 ,
         1.1351492 , 1.214601  , 1.214601  , 1.2423488 , 1.2423488 ],
        dtype=float32),
  array([3.6314204, 3.6314204, 4.0911403, 4.0911403, 4.2446575, 4.2446575,
         4.4768524, 4.4768524, 4.939749 , 4.939749 , 5.056326 , 5.056326 ,
         5.