In [1]:
from compare_models import (
    compare_target_sentence_rankings,
    cosine_distance,
    euclidean_distance,
)
import pandas as pd
from sentence_transformers import SentenceTransformer

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from embedding_providers import OpenAIEmbeddingProvider, SentenceTransformerProvider
import openai
from dotenv import load_dotenv
import os

env_path = "/Users/ford/Documents/coding/confidential/.env"
load_dotenv(env_path)
api_key = os.getenv("OPENAI_API_KEY")
assert api_key, "API key is missing"
# Set your OpenAI API key
openai.api_key = api_key

# Initialize embedding providers with caching
embedding_models = {
    "all-MiniLM-L6-v2": SentenceTransformerProvider("all-MiniLM-L6-v2"),
    "all-mpnet-base-v2": SentenceTransformerProvider("all-mpnet-base-v2"),
    "openai-small": OpenAIEmbeddingProvider(model="text-embedding-3-small"),
}

In [3]:
input_sentences = [
    "Market risk affects investment returns",
    "Cybersecurity poses significant threats",
    "Credit risk in lending operations",
]

candidate_sets = {
    "candidate_set_1": {
        "candidate_sentences": [
            "Market fluctuations impact returns",
            "Weather is nice today",
            "Investment returns affected by market",
            "Cybersecurity threats are increasing",
        ],
        "target_sentence": "Market fluctuations impact returns",
    },
    "candidate_set_1": {
        "candidate_sentences": [
            "Credit risk assessment in banking",
            "Market volatility affects investments",
            "Cyber attacks on organizations",
            "Risk management in finance",
        ],
        "target_sentence": "Market volatility affects investments",
    },
    "candidate_set_1": {
        "candidate_sentences": [
            "Financial market volatility",
            "Credit risk evaluation methods",
            "Information security threats",
            "Enterprise risk management",
        ],
        "target_sentence": "Financial market volatility",
    },
}


# Initialize distance functions
distance_functions = {"cosine": cosine_distance, "euclidean": euclidean_distance}

comparison_results = compare_target_sentence_rankings(
    input_sentences, candidate_sets, embedding_models, distance_functions
)

In [7]:
comparison_result_df = pd.DataFrame(comparison_results)

In [8]:
comparison_result_df.shape

(18, 8)

In [9]:
comparison_result_df.head()

Unnamed: 0,input_sentence,target_sentence,candidate_sentences,embedded_model,distance_method,sorted_similar_sentences,sorted_similar_sentences_indices,target_order_in_sorted_similar_sentences
0,Market risk affects investment returns,Financial market volatility,"[Financial market volatility, Credit risk eval...",all-MiniLM-L6-v2,cosine,"[Financial market volatility, Enterprise risk ...","[0, 3, 1, 2]",1
1,Market risk affects investment returns,Financial market volatility,"[Financial market volatility, Credit risk eval...",all-MiniLM-L6-v2,euclidean,"[Financial market volatility, Enterprise risk ...","[0, 3, 1, 2]",1
2,Market risk affects investment returns,Financial market volatility,"[Financial market volatility, Credit risk eval...",all-mpnet-base-v2,cosine,"[Financial market volatility, Enterprise risk ...","[0, 3, 1, 2]",1
3,Market risk affects investment returns,Financial market volatility,"[Financial market volatility, Credit risk eval...",all-mpnet-base-v2,euclidean,"[Financial market volatility, Enterprise risk ...","[0, 3, 1, 2]",1
4,Market risk affects investment returns,Financial market volatility,"[Financial market volatility, Credit risk eval...",openai-small,cosine,"[Financial market volatility, Enterprise risk ...","[0, 3, 1, 2]",1
