# Bi-Encoder and Cross-Encoder Model Evaluation for Medical Terms

## Define Model Evaluation Method

In [23]:
import numpy as np
from sentence_transformers import SentenceTransformer
from sentence_transformers.cross_encoder import CrossEncoder

from pandas import DataFrame
import pandas as pd


def evaluate_model(model_name: str, model_type: str, df_evaluation: DataFrame)-> float:
    """
    Evaluate the model by calculating the similarity between source and target terms/sentences; 
    then, calculate the percentage of matches using the highest similarity score.
    
    Args:
        model_name: The SentenceTransformer model to use for encoding.
        model_type: BiEncoder or CrossEncoder.
        df_evaluation: DataFrame containing 'source' and 'target' columns.
    
    Returns:
        percentage_of_matches: Percentage of matches with the highest similarity score.
    """ 
    if model_type == "BiEncoder":     
        model = SentenceTransformer(model_name)       
        # Calculate embeddings for source and target sentences
        source_embeddings = model.encode(df_evaluation['source'].tolist())
        target_embeddings = model.encode(df_evaluation['target'].tolist())

        # Calculate similarities
        similarities = model.similarity(source_embeddings, target_embeddings)
    elif model_type == "CrossEncoder":
        model = CrossEncoder(model_name)
        
        # Create all possible pairs between sources and targets
        sources = df_evaluation['source'].tolist()
        targets = df_evaluation['target'].tolist()
        pairs = [(src, tgt) for src in sources for tgt in targets]

        # Predict similarities for all pairs
        similarity_scores = model.predict(pairs, convert_to_tensor=True)

        # Reshape the flat similarity scores into a matrix (num_sources x num_targets)
        similarities = similarity_scores.view(len(sources), len(targets))
        
    else:
        raise ValueError("Invalid model_type. Choose either 'BiEncoder' or 'CrossEncoder'.")

    df_similarities = pd.DataFrame(similarities.numpy())

    # Find the index of the highest score in each column
    max_idx_col = df_similarities.values.argmax(axis=0)

    # Calculate the percentage where the highest score is on the diagonal
    number_of_matches = np.sum(max_idx_col == np.arange(df_similarities.shape[1]))
    percentage_of_matches = (number_of_matches / df_similarities.shape[1]) * 100
    
    return percentage_of_matches


## Load Evaluation Data

In [25]:
import pandas as pd
# Load the evaluation dataset
df_raw = pd.read_csv('data\\bc-health-concerns-and-diagnosis-value-set-v3-constrained.csv')
df_data = df_raw[['SNOMED_Term', 'ICD10CA_Term']].copy()
df_data.columns = ['source', 'target']
df_data = df_data.head(10)

## Evaluate and Save Model Performance

In [None]:
df_model= pd.read_csv('Models.csv')

for idx, row in df_model.iterrows():
    
    model_name = row['Model'].strip()
    model_type = row['Model Type'].strip()
    accuracy = evaluate_model(model_name, model_type, df_data)
    df_model.at[idx, 'Accuracy'] = accuracy
    
    print(f"Model: {model_name}, Type: {model_type}, Accuracy: {accuracy:.2f}%")

df_model.to_csv('Models_Accuracy.csv', index=False)



Model: all-MiniLM-L6-v2, Type: BiEncoder, Accuracy: 90.00%
Model: sentence-transformers/all-mpnet-base-v2, Type: BiEncoder, Accuracy: 90.00%
Model: sentence-transformers/allenai-specter, Type: BiEncoder, Accuracy: 100.00%


Xet Storage is enabled for this repo, but the 'hf_xet' package is not installed. Falling back to regular HTTP download. For better performance, install the package with: `pip install huggingface_hub[hf_xet]` or `pip install hf_xet`
