In [1]:
# Step 1: Load required libraries
import pickle
import torch
from sentence_transformers import SentenceTransformer, util
import warnings
warnings.filterwarnings('ignore')

# Step 2: Load saved embeddings + model
with open("embeddings.pkl", "rb") as f:
    data = pickle.load(f)

english_sentences = data["english_sentences"]
french_sentences = data["french_sentences"]
french_embeddings = data["french_embeddings"]

model = SentenceTransformer("paraphrase-multilingual-MiniLM-L12-v2")

# Step 3: Encode English sentences
english_embeddings = model.encode(english_sentences, convert_to_tensor=True)

# Step 4: Evaluate Top-1, Top-3, Top-5
top_1, top_3, top_5 = 0, 0, 0
total = len(english_embeddings)

for i, query_embedding in enumerate(english_embeddings):
    similarity = util.cos_sim(query_embedding, french_embeddings)[0]
    top_indices = torch.topk(similarity, k=5).indices.tolist()

    if i == top_indices[0]: top_1 += 1
    if i in top_indices[:3]: top_3 += 1
    if i in top_indices: top_5 += 1

print(f"Top-1 Accuracy: {top_1 / total * 100:.2f}% ({top_1}/{total})")
print(f"Top-3 Accuracy: {top_3 / total * 100:.2f}% ({top_3}/{total})")
print(f"Top-5 Accuracy: {top_5 / total * 100:.2f}% ({top_5}/{total})")


Top-1 Accuracy: 56.50% (113/200)
Top-3 Accuracy: 86.00% (172/200)
Top-5 Accuracy: 93.50% (187/200)
