### TextQA/Semantic Analysis

In [12]:
import yaml
import pandas as pd
import torch
!pip install -U sentence-transformers
from sentence_transformers import SentenceTransformer, util
from bert_score import score
from transformers import AutoTokenizer, AutoModel
from sklearn.metrics.pairwise import cosine_similarity

# Load configuration
with open("config.yaml", "r") as file:
    config = yaml.safe_load(file)

# Load models
st_model = SentenceTransformer(config["sentence_transformer_model"])
bert_tokenizer = AutoTokenizer.from_pretrained(config["bert_model"])
bert_model = AutoModel.from_pretrained(config["bert_model"])

def compute_similarity(human_solution, llm_solution):
    """Computes similarity scores using multiple methods"""
    
    # Sentence Transformers cosine similarity
    emb1 = st_model.encode(human_solution, convert_to_tensor=True)
    emb2 = st_model.encode(llm_solution, convert_to_tensor=True)
    st_similarity = util.pytorch_cos_sim(emb1, emb2).item()

    # BERT-Score
    P, R, F1 = score([llm_solution], [human_solution], lang="en", model_type=config["bert_model"])
    
    return {
        "st_similarity": st_similarity,
        "bert_score": F1.tolist()[0]
    }

# Example usage
human_text = "The issue is with database connectivity."
llm_text = "The error is caused by a database connection failure."
print(compute_similarity(human_text, llm_text))


Collecting sentence-transformers
  Downloading sentence_transformers-4.0.1-py3-none-any.whl.metadata (13 kB)
Downloading sentence_transformers-4.0.1-py3-none-any.whl (340 kB)
   ---------------------------------------- 0.0/340.6 kB ? eta -:--:--
   --------- ------------------------------ 81.9/340.6 kB 1.5 MB/s eta 0:00:01
   -------------------------- ------------- 225.3/340.6 kB 2.3 MB/s eta 0:00:01
   ---------------------------------------- 340.6/340.6 kB 2.6 MB/s eta 0:00:00
Installing collected packages: sentence-transformers
Successfully installed sentence-transformers-4.0.1


ModuleNotFoundError: No module named 'bert_score'