In [None]:
import openai
from transformers import AutoTokenizer, AutoModelForCausalLM
import torch

# Function to evaluate sentence similarity using OpenAI GPT (commercial API)
def evaluate_similarity_gpt(sentence_pairs):
    results = []
    for sent1, sent2 in sentence_pairs:
        prompt = f"Compare the similarity between the following sentences on a scale of 0 to 1, where 0 means completely dissimilar and 1 means identical:\n1. \"{sent1}\" vs. \"{sent2}\""
        response = openai.ChatCompletion.create(
            model="gpt-4",
            messages=[
                {"role": "system", "content": "You are an expert in semantic similarity."},
                {"role": "user", "content": prompt}
            ]
        )
        score = float(response["choices"][0]["message"]["content"].strip())
        results.append((sent1, sent2, score))
    return results

# Function to evaluate sentence similarity using LLAMA or other open-source models
def evaluate_similarity_open_source(sentence_pairs, model_name="meta-llama/Llama-2-7b-chat-hf"):
    tokenizer = AutoTokenizer.from_pretrained(model_name)
    model = AutoModelForCausalLM.from_pretrained(model_name, device_map="auto")

    results = []
    for sent1, sent2 in sentence_pairs:
        prompt = f"Compare the similarity between the following sentences on a scale of 0 to 1, where 0 means completely dissimilar and 1 means identical:\n- \"{sent1}\" vs. \"{sent2}\""
        inputs = tokenizer(prompt, return_tensors="pt").to("cuda")
        outputs = model.generate(**inputs, max_new_tokens=50)
        response = tokenizer.decode(outputs[0], skip_special_tokens=True)

        # Extracting numerical score from response
        score = extract_score_from_response(response)
        results.append((sent1, sent2, score))
    return results

# Helper function to extract score from the model's response
def extract_score_from_response(response):
    try:
        score = float(response.split()[-1])  # Assuming the score is the last token
    except ValueError:
        score = None  # Handle cases where the model output is not a valid score
    return score

# Example sentence pairs to evaluate
sentence_pairs = [
    ("The cat is on the mat.", "The feline is sitting on the rug."),
    ("I am going to the market.", "I will visit the store."),
    ("She is reading a book.", "She is studying from a textbook."),
    ("The weather is sunny.", "It is raining heavily.")
]

# Evaluate using OpenAI GPT (commercial API)
# Uncomment and provide your OpenAI API key to use
# openai.api_key = "your-openai-api-key"
# gpt_results = evaluate_similarity_gpt(sentence_pairs)
# print("Results from GPT:", gpt_results)

# Evaluate using LLAMA (open-source model)
llama_results = evaluate_similarity_open_source(sentence_pairs)
print("Results from LLAMA:", llama_results)
