In [None]:
from sklearn.metrics.pairwise import cosine_similarity
from sklearn.feature_extraction.text import TfidfVectorizer

# Create a TF-IDF vectorizer
vectorizer = TfidfVectorizer()

# Function to compute the cosine similarity between two sentences
def compute_similarity(sent1, sent2):
    tfidf_matrix = vectorizer.fit_transform([sent1, sent2])
    return cosine_similarity(tfidf_matrix[0:1], tfidf_matrix[1:2])[0][0]

# Compare the sentences and determine if they match in meaning based on a threshold
threshold = 0.8
matches = []
for index, row in data.iterrows():
    similarity = compute_similarity(row["model output"], row["valid output"])
    matches.append(similarity >= threshold)

# Count the number of matching and non-matching phrases
num_matches = sum(matches)
num_non_matches = len(matches) - num_matches

num_matches, num_non_matches


In [None]:
from collections import Counter

def compare_sentences_heuristic(sentence1, sentence2):
    """Compare two sentences based on word overlap."""
    # Tokenize and count words in each sentence
    words1 = Counter(sentence1.lower().split())
    words2 = Counter(sentence2.lower().split())
    
    # Find common words between the two sentences
    common_words = words1 & words2
    
    # Calculate the ratio of common words to total words in each sentence
    ratio1 = sum(common_words.values()) / sum(words1.values())
    ratio2 = sum(common_words.values()) / sum(words2.values())
    
    # If both ratios are above a certain threshold, consider the sentences similar
    threshold = 0.6
    return ratio1 > threshold and ratio2 > threshold

# Apply the heuristic comparison to each row in the data
data['consistent'] = data.apply(lambda row: compare_sentences_heuristic(row['model output'], row['valid output']), axis=1)

# Count the number of consistent and inconsistent rows
consistent_count = data['consistent'].sum()
inconsistent_count = len(data) - consistent_count

consistent_count, inconsistent_count
