In [2]:
from huggingface_hub import login
import random
import torch, gc
from sentence_transformers import SentenceTransformer
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from datasets import load_dataset
from kaggle_secrets import UserSecretsClient
user_secrets = UserSecretsClient()
hugging = user_secrets.get_secret("hugging")
login(hugging )

In [3]:
# List các model
model_ids = [
    "Dqdung205/medical_vietnamese_embedding",
    "BAAI/bge-m3",
    "dangvantuan/vietnamese-document-embedding",
    "Alibaba-NLP/gte-multilingual-base",
    "AITeamVN/Vietnamese_Embedding",
    "strongpear/M3-retriever-MEDICAL",
    "hiieu/halong_embedding",
    "bkai-foundation-models/vietnamese-bi-encoder"
]

# Load dataset ViHealthQA
dataset = load_dataset("tarudesu/ViHealthQA", split="train[:200]")  
queries = {str(i): q for i, q in enumerate(dataset["question"])}
corpus = {str(i): a for i, a in enumerate(dataset["answer"])}
relevant_docs = {str(i): {str(i)} for i in range(len(dataset))}  # mỗi query chỉ có 1 answer đúng

README.md: 0.00B [00:00, ?B/s]

train.csv: 0.00B [00:00, ?B/s]

val.csv: 0.00B [00:00, ?B/s]

test.csv: 0.00B [00:00, ?B/s]

Generating train split:   0%|          | 0/7009 [00:00<?, ? examples/s]

Generating validation split:   0%|          | 0/993 [00:00<?, ? examples/s]

Generating test split:   0%|          | 0/2013 [00:00<?, ? examples/s]

In [4]:
import torch, gc

all_results = []
device='cuda'
for model_id in model_ids:
    print(f"\nEvaluating model: {model_id}")
    model = SentenceTransformer(model_id, device=device, trust_remote_code=True)
    
    ir_evaluator = InformationRetrievalEvaluator(
        queries,
        corpus,
        relevant_docs,
        mrr_at_k=[3, 5, 10],
        ndcg_at_k=[3, 5, 10]
    )
    
    results = ir_evaluator(model)
    
    # Lưu vào list dạng dict
    all_results.append({
        "model": model_id,
        "ndcg@3": results["cosine_ndcg@3"],
        "ndcg@5": results["cosine_ndcg@5"],
        "ndcg@10": results["cosine_ndcg@10"],
        "mrr@3": results["cosine_mrr@3"],
        "mrr@5": results["cosine_mrr@5"],
        "mrr@10": results["cosine_mrr@10"]
    })
    
    # Clear VRAM sau mỗi model
    del model
    gc.collect()
    if device == "cuda":
        torch.cuda.empty_cache()
        torch.cuda.ipc_collect()




Evaluating model: Dqdung205/medical_vietnamese_embedding


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/283 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/58.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Dqdung205/medical_vietnamese_embedding:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Dqdung205/medical_vietnamese_embedding:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/312 [00:00<?, ?B/s]


Evaluating model: BAAI/bge-m3


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/687 [00:00<?, ?B/s]

pytorch_model.bin:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/444 [00:00<?, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/191 [00:00<?, ?B/s]


Evaluating model: dangvantuan/vietnamese-document-embedding


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/dangvantuan/Vietnamese_impl:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/dangvantuan/Vietnamese_impl:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/1.22G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/296 [00:00<?, ?B/s]


Evaluating model: Alibaba-NLP/gte-multilingual-base


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/55.0 [00:00<?, ?B/s]

config.json: 0.00B [00:00, ?B/s]

configuration.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- configuration.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


modeling.py: 0.00B [00:00, ?B/s]

A new version of the following files was downloaded from https://huggingface.co/Alibaba-NLP/new-impl:
- modeling.py
. Make sure to double-check they do not contain any added malicious code. To avoid downloading new versions of the code file, you can pin a revision.


model.safetensors:   0%|          | 0.00/611M [00:00<?, ?B/s]

Some weights of the model checkpoint at Alibaba-NLP/gte-multilingual-base were not used when initializing NewModel: ['classifier.bias', 'classifier.weight']
- This IS expected if you are initializing NewModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing NewModel from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).


tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]


Evaluating model: AITeamVN/Vietnamese_Embedding


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/171 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/708 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]


Evaluating model: strongpear/M3-retriever-MEDICAL


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/205 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/54.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/698 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/2.27G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

sentencepiece.bpe.model:   0%|          | 0.00/5.07M [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/297 [00:00<?, ?B/s]


Evaluating model: hiieu/halong_embedding


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/201 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/749 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/1.11G [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

tokenizer.json:   0%|          | 0.00/17.1M [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/964 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/296 [00:00<?, ?B/s]


Evaluating model: bkai-foundation-models/vietnamese-bi-encoder


modules.json:   0%|          | 0.00/229 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/123 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/777 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/540M [00:00<?, ?B/s]

tokenizer_config.json: 0.00B [00:00, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

bpe.codes: 0.00B [00:00, ?B/s]

added_tokens.json:   0%|          | 0.00/22.0 [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/167 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/270 [00:00<?, ?B/s]

In [5]:
import pandas as pd

# Chuyển sang DataFrame
df = pd.DataFrame(all_results)
df = df.sort_values(by="ndcg@10", ascending=False)

In [6]:
print(df.to_markdown(index=False))

| model                                        |   ndcg@3 |   ndcg@5 |   ndcg@10 |    mrr@3 |    mrr@5 |   mrr@10 |
|:---------------------------------------------|---------:|---------:|----------:|---------:|---------:|---------:|
| Dqdung205/medical_vietnamese_embedding       | 0.889701 | 0.90003  |  0.909804 | 0.8775   | 0.88325  | 0.887317 |
| BAAI/bge-m3                                  | 0.853629 | 0.875782 |  0.880922 | 0.839167 | 0.851167 | 0.853458 |
| Alibaba-NLP/gte-multilingual-base            | 0.855474 | 0.861935 |  0.875084 | 0.838333 | 0.842083 | 0.847629 |
| dangvantuan/vietnamese-document-embedding    | 0.842856 | 0.857929 |  0.873136 | 0.83     | 0.83875  | 0.845407 |
| AITeamVN/Vietnamese_Embedding                | 0.822201 | 0.83253  |  0.848439 | 0.8125   | 0.81825  | 0.824657 |
| strongpear/M3-retriever-MEDICAL              | 0.814701 | 0.829117 |  0.840277 | 0.8025   | 0.8105   | 0.815012 |
| hiieu/halong_embedding                       | 0.779938 | 0.794355 |  