In [3]:
from sentence_transformers import SentenceTransformer
model = SentenceTransformer('sentence-transformers/paraphrase-xlm-r-multilingual-v1')

  from .autonotebook import tqdm as notebook_tqdm


In [4]:
documents = [
    "This a list which containing sample documents.",
    "Keywords are important for keyword-based search.",
    "Document analysis involves extracting keywords.",
    "Keyword-based search relies on sparse embeddings.",
    "Understsnding document structure aids in keyword extraction.",
    "Efficient keyword extraction enhances search accuracy.",
    "Semacntic similarity improves document retrieval performance.",
    "Machine learning techniques can optimize keyword extraction methods."
]
len(documents)

8

In [5]:
query = "Natural language processing techniques enhances keyword extraction efficiency."

In [6]:
docs_embeddings = model.encode(documents)
query_embedding = model.encode(query)

In [8]:
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np
similarities = cosine_similarity(np.array([query_embedding]), docs_embeddings)
similarities

array([[0.18442771, 0.45484942, 0.57262063, 0.44097388, 0.61043894,
        0.7594256 , 0.5526097 , 0.79599875]], dtype=float32)

In [10]:
index = np.argsort(similarities[0])[::-1]
index

array([7, 5, 4, 2, 6, 1, 3, 0])

In [11]:
ranked_docs = [(documents[i], similarities[0][i]) for i in index]
ranked_docs

[('Machine learning techniques can optimize keyword extraction methods.',
  np.float32(0.79599875)),
 ('Efficient keyword extraction enhances search accuracy.',
  np.float32(0.7594256)),
 ('Understsnding document structure aids in keyword extraction.',
  np.float32(0.61043894)),
 ('Document analysis involves extracting keywords.', np.float32(0.57262063)),
 ('Semacntic similarity improves document retrieval performance.',
  np.float32(0.5526097)),
 ('Keywords are important for keyword-based search.', np.float32(0.45484942)),
 ('Keyword-based search relies on sparse embeddings.', np.float32(0.44097388)),
 ('This a list which containing sample documents.', np.float32(0.18442771))]

In [14]:
top_4_docs = [doc[0] for doc in ranked_docs[:4]]
top_4_docs

['Machine learning techniques can optimize keyword extraction methods.',
 'Efficient keyword extraction enhances search accuracy.',
 'Understsnding document structure aids in keyword extraction.',
 'Document analysis involves extracting keywords.']

In [2]:
import cohere
co = cohere.ClientV2("YkgFReu3f2LWWIJhDXa2TV2LzRhZlQaFAwGvGDNo")

In [15]:
response = co.rerank(
    model="rerank-v3.5",
    query= "Natural language processing techniques enhances keyword extraction efficiency.",
    documents=top_4_docs,
    return_documents=True,
)

In [16]:
response



In [17]:
response.results[0].document.text

'Machine learning techniques can optimize keyword extraction methods.'

In [18]:
response.results[0].relevance_score

0.72418857

In [None]:
for i in range(4):
    print(f"Document: {response.results[i].document.text}, Relevance Score: {response.results[i].relevance_score}")

Document: Machine learning techniques can optimize keyword extraction methods., Relevance Score: 0.72418857
Document: Efficient keyword extraction enhances search accuracy., Relevance Score: 0.55391616
Document: Understsnding document structure aids in keyword extraction., Relevance Score: 0.42577294
Document: Document analysis involves extracting keywords., Relevance Score: 0.35260347
