In [None]:
import string
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import nltk
from nltk.corpus import stopwords
from collections import Counter
import numpy as np
from functools import lru_cache

nltk.download('stopwords')

# Load the set of stopwords and define negation words
stop_words = set(stopwords.words('english'))
negation_words = {"not", "n't", "no", "never"}

# Load the SBERT model (fine-tuned on engineering texts if available)
model = SentenceTransformer('allenai/scibert_scivocab_uncased')

def preprocess_text(text):
    text = text.lower().translate(str.maketrans('', '', string.punctuation))
    words = text.split()
    processed_words = []
    original_words = []
    negate = False

    for word in words:
        if word in stop_words and word not in negation_words:
            continue
        if word in negation_words:
            negate = True
            original_words.append(word)
            continue
        if negate:
            processed_words.append("NOT_" + word)
            negate = False
        else:
            processed_words.append(word)
        original_words.append(word)

    return processed_words, original_words

def check_negation(original_words):
    return any(word in negation_words for word in original_words)

def remove_redundant_words(words):
    return list(dict.fromkeys(words))

def remove_question_keywords(answer_words, question_words):
    return [word for word in answer_words if word not in question_words]

def compute_word_frequency_vector(words, unique_words):
    word_freq = Counter(words)
    vector = [word_freq[word] for word in unique_words]
    return vector

def classify_question(question):
    explanation_keywords = {"explain", "describe", "why", "how"}
    if any(keyword in question.lower() for keyword in explanation_keywords):
        return "explanation"
    return "fact"

@lru_cache(maxsize=10000)
def get_embedding(text):
    return model.encode(text)

def evaluate_with_keywords(reference_answer, candidate_answer, question):
    question_processed, question_original = preprocess_text(question)
    ref_processed, ref_original = preprocess_text(reference_answer)
    cand_processed, cand_original = preprocess_text(candidate_answer)

    ref_filtered = remove_question_keywords(ref_processed, question_processed)
    cand_filtered = remove_question_keywords(cand_processed, question_processed)

    ref_filtered = remove_redundant_words(ref_filtered)
    cand_filtered = remove_redundant_words(cand_filtered)

    unique_words = list(set(ref_filtered + cand_filtered))

    ref_vector = compute_word_frequency_vector(ref_filtered, unique_words)
    cand_vector = compute_word_frequency_vector(cand_filtered, unique_words)

    ref_vector = np.array(ref_vector).reshape(1, -1)
    cand_vector = np.array(cand_vector).reshape(1, -1)

    cosine_sim = cosine_similarity(ref_vector, cand_vector)[0][0]
    match_percentage = cosine_sim * 100

    return match_percentage, ref_filtered, cand_filtered, question_processed, ref_original, cand_original, question_original

def evaluate_with_sbert(reference_answer, candidate_answer, question):
    ref_processed, ref_original = preprocess_text(reference_answer)
    cand_processed, cand_original = preprocess_text(candidate_answer)

    ref_embedding = get_embedding(" ".join(ref_processed))
    cand_embedding = get_embedding(" ".join(cand_processed))

    cosine_sim = cosine_similarity([ref_embedding], [cand_embedding])[0][0]
    match_percentage = cosine_sim * 100

    return match_percentage, ref_processed, cand_processed, preprocess_text(question)[0], ref_original, cand_original, preprocess_text(question)[1]

def adjust_for_negation(match_percentage, question_original, ref_original, cand_original):
    question_has_negation = check_negation(question_original)
    ref_has_negation = check_negation(ref_original)
    cand_has_negation = check_negation(cand_original)

    if question_has_negation and ref_has_negation and cand_has_negation:
        return match_percentage
    elif cand_has_negation and not (question_has_negation or ref_has_negation):
        match_percentage *= 0.3
    elif (question_has_negation or ref_has_negation) and not cand_has_negation:
        match_percentage *= 0.5

    return match_percentage

def generalize_score(sbert_score, keyword_score, question_original, ref_original, cand_original):
    difference_threshold = 10.0

    if abs(sbert_score - keyword_score) <= difference_threshold:
        generalized_score = (sbert_score + keyword_score) / 2
    else:
        generalized_score = (sbert_score * 0.7) + (keyword_score * 0.3)

    generalized_score = adjust_for_negation(generalized_score, question_original, ref_original, cand_original)

    return generalized_score

def evaluate_answer(reference_answer, candidate_answer, question):
    question_type = classify_question(question)

    if question_type == "explanation":
        match_percentage, ref_keywords, cand_keywords, question_keywords, ref_original, cand_original, question_original = evaluate_with_sbert(reference_answer, candidate_answer, question)
    else:
        keyword_match_percentage, ref_keywords, cand_keywords, question_keywords, ref_original, cand_original, question_original = evaluate_with_keywords(reference_answer, candidate_answer, question)
        sbert_match_percentage, _, _, _, _, _, _ = evaluate_with_sbert(reference_answer, candidate_answer, question)
        match_percentage = generalize_score(sbert_match_percentage, keyword_match_percentage, question_original, ref_original, cand_original)

    return match_percentage, ref_keywords, cand_keywords, question_keywords

# Function to evaluate a specific input
def evaluate_specific_input(reference_answer, candidate_answer, question):
    match_percentage, ref_keywords, cand_keywords, question_keywords = evaluate_answer(reference_answer, candidate_answer, question)

    print(f"Generalized Match Percentage: {match_percentage:.2f}%")
    print(f"Reference Keywords: {ref_keywords}")
    print(f"Candidate Keywords: {cand_keywords}")
    print(f"Question Keywords: {question_keywords}")

# Call the function with the given input
evaluate_specific_input(reference_answer, candidate_answer, question)
