In [6]:
# Load the model from Hugging Face and compute number of parameters
from transformers import AutoModel

model_name = "nreimers/TinyBERT_L-6_H-768_v2"

model = AutoModel.from_pretrained(model_name)
# Divide the number of parameters by 1 million for easier readability
def num_parameters(model):
    return sum(p.numel() for p in model.parameters()) / 1_000_000

print(f"Model: {model_name}")
print(f"Number of parameters: {num_parameters(model):.2f}M")

Some weights of the model checkpoint at nreimers/TinyBERT_L-6_H-768_v2 were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'fit_denses.0.bias', 'fit_denses.0.weight', 'fit_denses.1.bias', 'fit_denses.1.weight', 'fit_denses.2.bias', 'fit_denses.2.weight', 'fit_denses.3.bias', 'fit_denses.3.weight', 'fit_denses.4.bias', 'fit_denses.4.weight', 'fit_denses.5.bias', 'fit_denses.5.weight', 'fit_denses.6.bias', 'fit_denses.6.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel fr

Model: nreimers/TinyBERT_L-6_H-768_v2
Number of parameters: 66.96M


In [7]:
model_name = "nreimers/TinyBERT_L-4_H-312_v2"

model = AutoModel.from_pretrained(model_name)

print(f"Model: {model_name}")
print(f"Number of parameters: {num_parameters(model):.2f}M")

Some weights of the model checkpoint at nreimers/TinyBERT_L-4_H-312_v2 were not used when initializing BertModel: ['cls.predictions.bias', 'cls.predictions.decoder.weight', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.dense.weight', 'cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'fit_denses.0.bias', 'fit_denses.0.weight', 'fit_denses.1.bias', 'fit_denses.1.weight', 'fit_denses.2.bias', 'fit_denses.2.weight', 'fit_denses.3.bias', 'fit_denses.3.weight', 'fit_denses.4.bias', 'fit_denses.4.weight']
- This IS expected if you are initializing BertModel from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing BertModel from the checkpoint of a model that you expect to be exactly identical (initializing a Ber

Model: nreimers/TinyBERT_L-4_H-312_v2
Number of parameters: 14.35M


In [8]:
model_name = "nreimers/MiniLM-L6-H384-uncased"

model = AutoModel.from_pretrained(model_name)
print(f"Model: {model_name}")
print(f"Number of parameters: {num_parameters(model):.2f}M")


Model: nreimers/MiniLM-L6-H384-uncased
Number of parameters: 22.71M


In [9]:
model_name = "microsoft/MiniLM-L12-H384-uncased"

model = AutoModel.from_pretrained(model_name)
print(f"Model: {model_name}")
print(f"Number of parameters: {num_parameters(model):.2f}M")

Model: microsoft/MiniLM-L12-H384-uncased
Number of parameters: 33.36M


In [None]:
# Write up list of datasets to consider
datasets = [
    "scifact",
    "arguana",
    "nfcorpus",
    "scidocs",
    "fiqa",
    "trec-covid",
    "webis-touche2020",
    "quora",
    ]

# "trec-news", # Not sure if to include, no query length info on pylate
# "robust04", # Not sure if to include, no query length info on pylate

## Evaluate on NanoBEIR datasets on sentence transformers library

In [9]:
from sentence_transformers import SentenceTransformer
from sentence_transformers.evaluation import NanoBEIREvaluator

# model = SentenceTransformer('answerdotai/ModernBERT-base')
model = SentenceTransformer('pinecone/ConstBERT')

# dataset_name_to_human_readable = {
#     "climatefever": "ClimateFEVER",
#     "dbpedia": "DBPedia",
#     "fever": "FEVER",
#     "fiqa2018": "FiQA2018",
#     "hotpotqa": "HotpotQA",
#     "msmarco": "MSMARCO",
#     "nfcorpus": "NFCorpus",
#     "nq": "NQ",
#     "quoraretrieval": "QuoraRetrieval",
#     "scidocs": "SCIDOCS",
#     "arguana": "ArguAna",
#     "scifact": "SciFact",
#     "touche2020": "Touche2020",
# }

datasets = ["QuoraRetrieval", "MSMARCO"]
query_prompts = {
    "QuoraRetrieval": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "Touche2020": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "NFCorpus": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "SciFact": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "ArguAna": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "SCIDOCS": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "FiQA2018": "Instruct: Given a question, retrieve questions that are semantically equivalent to the given question\\nQuery: ",
    "MSMARCO": "Instruct: Given a web search query, retrieve relevant passages that answer the query\\nQuery: "
    }

evaluator = NanoBEIREvaluator(
    dataset_names=datasets,
    query_prompts=query_prompts,
    )

results = evaluator(model)

No sentence-transformers model found with name pinecone/ConstBERT. Creating a new one with mean pooling.
                                                                        

In [8]:
print(results[evaluator.primary_metric])

0.6165134596949706


## Document results

Mean NDCG@10
- joe32140/ModernBERT-base-msmarco' (0.62)

In [None]:
## 