# Imports

In [11]:
import pandas as pd
from datasets import load_dataset, Dataset
from datasets import concatenate_datasets
from sentence_transformers import SentenceTransformer, util
from sentence_transformers.evaluation import InformationRetrievalEvaluator
from sentence_transformers.util import cos_sim

# Initialization

In [2]:
train_df = load_dataset("csv", data_files="training_data.csv")
eval_df = load_dataset("csv", data_files="eval_data.csv")
train_df = train_df.rename_column("query", "anchor")
train_df = train_df.rename_column("corpus", "positive")
eval_df = eval_df.rename_column("query", "anchor")
eval_df = eval_df.rename_column("corpus", "positive")
final_model_path = "bge-m3-finance-model"

# Process and Evaluate

In [3]:
corpus_dataset = concatenate_datasets([train_df['train'], eval_df['train']])
# Convert the datasets to dictionaries
corpus = dict(
    zip(corpus_dataset["id"], corpus_dataset["positive"])
)  # Our corpus (cid => document)
queries = dict(
    zip(eval_df['train']["id"], eval_df['train']["anchor"])
)  # Our queries (qid => question)

In [4]:
 # Create a mapping of relevant document (1 in our case) for each query
relevant_docs = {}  # Query ID to relevant documents (qid => set([relevant_cids])
for q_id in queries:
    relevant_docs[q_id] = [q_id]

In [5]:
ir_evaluator = InformationRetrievalEvaluator(
    queries=queries,
    corpus=corpus,
    relevant_docs=relevant_docs,
    truncate_dim=2048,  # Truncate the embeddings to a certain dimension
    score_functions={"cosine": cos_sim},
)

# Base Model Metrics

In [6]:
model = SentenceTransformer(model_name_or_path="BAAI/bge-m3", trust_remote_code=True)
model.max_seq_length=256
ir_evaluator(model)

{'cosine_accuracy@1': 0.6088397790055249,
 'cosine_accuracy@3': 0.7226519337016575,
 'cosine_accuracy@5': 0.7613259668508288,
 'cosine_accuracy@10': 0.8276243093922652,
 'cosine_precision@1': 0.6088397790055249,
 'cosine_precision@3': 0.2408839779005525,
 'cosine_precision@5': 0.15226519337016572,
 'cosine_precision@10': 0.08276243093922653,
 'cosine_recall@1': 0.6088397790055249,
 'cosine_recall@3': 0.7226519337016575,
 'cosine_recall@5': 0.7613259668508288,
 'cosine_recall@10': 0.8276243093922652,
 'cosine_ndcg@10': 0.7116877051559333,
 'cosine_mrr@10': 0.6753253529772868,
 'cosine_map@100': 0.6800862227735942}

# Fine-tuned Model Metrics

In [7]:
finetuned_model = SentenceTransformer(final_model_path, trust_remote_code=True)
ir_evaluator(finetuned_model)

  warn("The installed version of bitsandbytes was compiled without GPU support. "


'NoneType' object has no attribute 'cadam32bit_grad_fp32'


{'cosine_accuracy@1': 0.6629834254143646,
 'cosine_accuracy@3': 0.7790055248618785,
 'cosine_accuracy@5': 0.8176795580110497,
 'cosine_accuracy@10': 0.8651933701657458,
 'cosine_precision@1': 0.6629834254143646,
 'cosine_precision@3': 0.2596685082872928,
 'cosine_precision@5': 0.16353591160220995,
 'cosine_precision@10': 0.08651933701657458,
 'cosine_recall@1': 0.6629834254143646,
 'cosine_recall@3': 0.7790055248618785,
 'cosine_recall@5': 0.8176795580110497,
 'cosine_recall@10': 0.8651933701657458,
 'cosine_ndcg@10': 0.7618216813501208,
 'cosine_mrr@10': 0.7290296413224592,
 'cosine_map@100': 0.7336044058479377}

In [16]:
data_base = {'cosine_accuracy@10': 0.8276243093922652,
 'cosine_precision@1': 0.6088397790055249,
 'cosine_recall@10': 0.8276243093922652,
 'cosine_ndcg@10': 0.7116877051559333,
 'cosine_mrr@10': 0.6753253529772868,
 'cosine_map@100': 0.6800862227735942}


data_finetuned = {'cosine_accuracy@10': 0.8651933701657458,
 'cosine_precision@1': 0.6629834254143646,
 'cosine_recall@10': 0.8651933701657458,
 'cosine_ndcg@10': 0.7618216813501208,
 'cosine_mrr@10': 0.7290296413224592,
 'cosine_map@100': 0.7336044058479377}

In [17]:
metric_name =  data_base.keys()
score = data_base.values()
score_new = data_finetuned.values()
data_json = {
    'Metrics': metric_name,
    'Scores_Base_Model': score,
    'Score_Fine-tuned_Model': score_new,
    'Performance Gain': [f"{round((score_f-score_b)*100,2)}%" for score_f,score_b in zip(score_new,score)]
}
df_metrics = pd.DataFrame(data=data_json)

In [18]:
df_metrics

Unnamed: 0,Metrics,Scores_Base_Model,Score_Fine-tuned_Model,Performance Gain
0,cosine_accuracy@10,0.827624,0.865193,3.76%
1,cosine_precision@1,0.60884,0.662983,5.41%
2,cosine_recall@10,0.827624,0.865193,3.76%
3,cosine_ndcg@10,0.711688,0.761822,5.01%
4,cosine_mrr@10,0.675325,0.72903,5.37%
5,cosine_map@100,0.680086,0.733604,5.35%


In [19]:
df_metrics.to_csv("results_bge_model.csv", errors=False, index=False)