In [1]:
from sentence_transformers import SentenceTransformer, CrossEncoder
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  from .autonotebook import tqdm as notebook_tqdm


In [3]:
bi_encoder=SentenceTransformer('all-MiniLM-L6-v2')

cross_encoder=CrossEncoder('cross-encoder/ms-marco-MiniLM-L-6-v2')

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development
Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`


In [None]:
documents = [
    "Paris is the capital of France.",            
    "The man worked in Paris, Texas.",             
    "Berlin is the capital of Germany.",            
]

In [28]:
query = "I want to learn python language"

documents = [
    "The python is a large non-venomous snake found in Africa.", # Doc 0: Has word "Python", but wrong meaning.
    "This coding tutorial covers variables, loops, and functions."      # Doc 1: No word "Python", but CORRECT meaning.
]

In [29]:


print(f"Query: {query}")


print("Running Bi-Encoder..")
doc_vectors=bi_encoder.encode(documents)
query_vectors=bi_encoder.encode([query])


bi_scores=cosine_similarity(query_vectors,doc_vectors)[0]
bi_winner_index=np.argmax(bi_scores)

print(f"Bi-Encoder picked doc {bi_winner_index}:'{documents[bi_winner_index]}'")
print(f"Score: {bi_scores[bi_winner_index]:.4f}")

print("\nRunning Cross-Encoder")

pairs=[[query,doc] for doc in documents]
cross_scores=cross_encoder.predict(pairs)
cross_winner_index=np.argmax(cross_scores)

print(f"Cross-Encoder picked Doc {cross_winner_index}: '{documents[cross_winner_index]}'")
print(f"Score: {cross_scores[cross_winner_index]:.4f}")


if bi_winner_index==cross_winner_index:
    print("\nBoth Agreed")
else:    
    print("\nDisagreement! The Cross-Encoder corrected the mistake")



Query: I want to learn python language
Running Bi-Encoder..
Bi-Encoder picked doc 0:'The python is a large non-venomous snake found in Africa.'
Score: 0.3844

Running Cross-Encoder
Cross-Encoder picked Doc 0: 'The python is a large non-venomous snake found in Africa.'
Score: -5.5549

Both Agreed
