In [6]:
import faiss
from sentence_transformers import SentenceTransformer
import pickle
import torch

In [7]:
model = SentenceTransformer('multi-qa-MiniLM-L6-cos-v1')

## L2

In [9]:
import faiss
import numpy as np

def search_nearest_neighbors(query, model, index, doc_list, k=5):
    # Encode the query to get its embedding vector
    query_embedding = model.encode([query])  # Ensure this is a 2D array for FAISS
    if len(query_embedding.shape) == 1:
        query_embedding = np.expand_dims(query_embedding, axis=0)

    # Search the index for the k nearest neighbors
    distances, indices = index.search(query_embedding, k)

    # Retrieve the nearest neighbor sentences and their distances
    nearest_neighbors = {
        'sentences': [doc_list[idx] for idx in indices[0]],
        'distances': distances[0]
    }

    return nearest_neighbors

with open("embeddings.pkl", "rb") as fIn:
    stored_data = pickle.load(fIn)
    stored_sentences = stored_data["sentences"]
    stored_embeddings = stored_data["embeddings"]

if isinstance(stored_embeddings, torch.Tensor):
    stored_embeddings = stored_embeddings.cpu().numpy()  # Convert to CPU and NumPy array
    
# Usage example
# Assuming 'model' is already defined and used to encode 'doc_list' into 'embeddings'
dimension = stored_embeddings.shape[1]
index = faiss.IndexFlatL2(dimension)
index.add(stored_embeddings)  # Ensure 'embeddings' is a NumPy array

query = "My sister's leg broke"
nearest_neighbors = search_nearest_neighbors(query, model, index, stored_sentences, k=5)

print("Nearest neighbors and their distances:")
for sentence, distance in zip(nearest_neighbors['sentences'], nearest_neighbors['distances']):
    print(f"Sentence: {sentence}, Distance: {distance:.4f}")


Nearest neighbors and their distances:
Sentence:   The evolution of Earth-Moon system is described by the dark matter field
fluid model proposed in the Meeting of Division of Particle and Field 2004,
American Physical Society. The current behavior of the Earth-Moon system agrees
with this model very well and the general pattern of the evolution of the
Moon-Earth system described by this model agrees with geological and fossil
evidence. The closest distance of the Moon to Earth was about 259000 km at 4.5
billion years ago, which is far beyond the Roche's limit. The result suggests
that the tidal friction may not be the primary cause for the evolution of the
Earth-Moon system. The average dark matter field fluid constant derived from
Earth-Moon system data is 4.39 x 10^(-22) s^(-1)m^(-1). This model predicts
that the Mars's rotation is also slowing with the angular acceleration rate
about -4.38 x 10^(-22) rad s^(-2).
, Distance: 4.1173
Sentence:   We describe a new algorithm, the $(k,\el