In [None]:
!pip install spacy
!python -m spacy download en_core_web_sm


In [4]:
import spacy
import pandas as pd

# Load the small English model for Spacy
nlp = spacy.load("en_core_web_sm")

# Load the dataset
file_path = "/content/sampled_dataECBGoldSilver.csv"  # Path for Google Colab
dataset = pd.read_csv(file_path)

# Function to extract entities from a sentence
def extract_entities(text):
    if not isinstance(text, str):  # Handle non-string values
        return []
    doc = nlp(text)
    entities = [(ent.text, ent.label_) for ent in doc.ents]
    return entities

# Function to compute FactScore
def compute_factscore(reference, generated):
    ref_entities = extract_entities(reference)
    gen_entities = extract_entities(generated)

    # Convert to sets for comparison
    ref_set = set(ref_entities)
    gen_set = set(gen_entities)

    # Intersection over reference entities
    matches = ref_set.intersection(gen_set)
    fact_score = len(matches) / len(ref_set) if ref_set else 0

    return fact_score

# Fill NaN values with an empty string and compute FactScores
total_factscore = 0
valid_pairs = 0

for index, row in dataset.iterrows():
    reference_sentence = row["Silver_Sentence"] if isinstance(row["Silver_Sentence"], str) else ""
    generated_sentence = row["amalREC_Test_Sentence"] if isinstance(row["amalREC_Test_Sentence"], str) else ""

    if reference_sentence and generated_sentence:
        fact_score = compute_factscore(reference_sentence, generated_sentence)
        total_factscore += fact_score
        valid_pairs += 1

# Calculate the average FactScore
average_factscore = total_factscore / valid_pairs if valid_pairs else 0

# Save the average score to a file
output_path = "/content/average_factscore.txt"
with open(output_path, "w") as file:
    file.write(f"Average FactScore: {average_factscore:.4f}\n")

print(f"Average FactScore: {average_factscore:.4f}")
print(f"Average FactScore saved to {output_path}")


Average FactScore: 0.5124
Average FactScore saved to /content/average_factscore.txt
