# huggingface

> huggingface backend API

In [None]:
#| default_exp backends.hf

In [None]:
#| hide
from nbdev.showdoc import *
%load_ext autoreload
%autoreload 2

In [None]:
#| export
from emb_opt.imports import *
from emb_opt.core import QueryResult, VectorDatabase, dataset_from_query_results

  from .autonotebook import tqdm as notebook_tqdm


In [None]:
#| export

class HFDatabase(VectorDatabase):
    def __init__(self, 
                 dataset: Dataset,
                 index_name: str,
                 k: int, 
                ):
        
        self.dataset = dataset
        self.index_name = index_name
        self.k = k
        
    def query(self, query_vectors: np.ndarray) -> Dataset:
        
        index = self.dataset.get_index(self.index_name)
        
        res = index.search_batch(query_vectors, k=self.k)
        distances = res.total_scores
        indices = res.total_indices
        
        n_queries, n_results = indices.shape
        
        results = []
        for query_idx in range(n_queries):
            for result_idx in range(n_results):
                db_idx = indices[query_idx, result_idx]
                
                data_dict = self.dataset[int(db_idx)]
                embedding = data_dict.pop(self.index_name)
                
                distance = distances[query_idx, result_idx]
                
                result = QueryResult(query_idx, db_idx, embedding, distance, data_dict)
                results.append(result)
                
        return dataset_from_query_results(results)

In [None]:
vectors = np.random.randn(64, 256)
dataset = Dataset.from_list([{'embedding':i} for i in vectors])
dataset.add_faiss_index('embedding')

hf_db = HFDatabase(dataset, 'embedding', 5)

query_vecs = np.random.randn(2, 256)
query_dataset = hf_db.query(query_vecs)

100%|███████████████████████████████████████████| 1/1 [00:00<00:00, 2542.00it/s]


In [None]:
#| hide
import nbdev; nbdev.nbdev_export()