In [1]:
import spacy
from sklearn.feature_extraction.text import TfidfVectorizer
from sklearn.metrics.pairwise import cosine_similarity
import networkx as nx

In [2]:
# Load SpaCy model
nlp = spacy.load("en_core_web_sm")

def extract_facts(text):
    doc = nlp(text)
    facts = []
    for sent in doc.sents:
        if any(token.dep_ in ["nsubj", "dobj", "pobj"] for token in sent):
            facts.append(sent.text)
    return facts

In [3]:
original_text = "i will go to the store and buy some milk, then i will go back home. Later, I will go to the gym and work out."
facts = extract_facts(original_text)
print(facts)

['i will go to the store and buy some milk, then i will go back home.', 'Later, I will go to the gym and work out.']


In [4]:
def build_fact_graph(facts):
    G = nx.Graph()
    for fact in facts:
        doc = nlp(fact)
        for token in doc:
            if token.dep_ in ["nsubj", "dobj", "pobj"]:
                G.add_edge(token.head.text, token.text)
    return G

def compare_facts(original_facts, generated_facts):
    original_graph = build_fact_graph(original_facts)
    generated_graph = build_fact_graph(generated_facts)
    
    fabricated_facts = []
    for fact in generated_facts:
        if not any(nx.is_isomorphic(original_graph, generated_graph.subgraph(c)) 
                   for c in nx.connected_components(generated_graph)):
            fabricated_facts.append(fact)
    
    return fabricated_facts

def check_fact_fabrication(original_text, generated_text):
    original_facts = extract_facts(original_text)
    generated_facts = extract_facts(generated_text)
    
    fabricated_facts = compare_facts(original_facts, generated_facts)
    
    if fabricated_facts:
        print("Potential fabricated facts detected:")
        for fact in fabricated_facts:
            print(f"- {fact}")
    else:
        print("No fabricated facts detected.")

In [5]:
from utils import load_json, load_txt
actual_facts = load_json("../bigger_study_sample/001-57899.json")['facts']
generated_facts = load_txt("./gpt-4/001-57899.txt")

In [7]:
check_fact_fabrication(original_text, generated_facts)

Potential fabricated facts detected:
- The case involves Mr. Anthony Boner, a British citizen, born in 1960, who was convicted of multiple criminal offenses, including assault and armed robbery, in a trial held in the High Court of Justiciary, Scotland, between March 29 and April 10, 1990.
- The key facts of the case are as follows:


- **Key Events:**
   - On December 14, 1989, three masked men, armed with a shotgun and a knife, committed a robbery at a post office in Glen Village, where they assaulted three employees and caused damage to a car.
   - Mr. Boner, along with two others, was arrested following an investigation and was charged with assault, armed robbery, wilful damage, and firearm-related offenses.
   -
- During the trial, a witness, Mrs. G., was allowed to give evidence after being present in the courtroom before her testimony, which the defense objected to.
- The trial judge ruled that her earlier presence did not affect the fairness of her testimony.
- Mrs. G.'s eviden

In [16]:
actual_facts

'I. CIRCUMSTANCES OF THE CASE\n6. The applicant, Mr Anthony Boner, is a British citizen born in 1960. He lives in .\nOn 14 December 1989 three masked men, armed with a shotgun and a knife, carried out a robbery at a post office in Glen Village, , in the course of which they assaulted three post-office employees. They also caused damage to a motor car.\nFollowing investigations, Mr Boner and two other men were arrested and remanded in custody. The applicant was indicted on a charge of assault and armed robbery, a charge of wilful damage and three charges relating to firearms. Between 29 March and 10 April 1990 he stood trial in the High Court of Justiciary sitting in .\n7. The applicant received legal aid for the preparation of his defence and for his representation by counsel at the trial.\n8. During the trial a prosecution witness, Mrs G., entered the courtroom prior to giving evidence and spoke to one of the applicant s co-accused, against whom charges had been dropped. When Mrs G. w

In [6]:
generated_facts

'**Answer:**  \nThe case involves Mr. Anthony Boner, a British citizen, born in 1960, who was convicted of multiple criminal offenses, including assault and armed robbery, in a trial held in the High Court of Justiciary, Scotland, between March 29 and April 10, 1990. The key facts of the case are as follows:\n\n1. **Key Events:**\n   - On December 14, 1989, three masked men, armed with a shotgun and a knife, committed a robbery at a post office in Glen Village, where they assaulted three employees and caused damage to a car.\n   - Mr. Boner, along with two others, was arrested following an investigation and was charged with assault, armed robbery, wilful damage, and firearm-related offenses.\n   - During the trial, a witness, Mrs. G., was allowed to give evidence after being present in the courtroom before her testimony, which the defense objected to. The trial judge ruled that her earlier presence did not affect the fairness of her testimony. Mrs. G.\'s evidence implicated Mr. Boner i

In [2]:
import spacy
from sentence_transformers import SentenceTransformer
from sklearn.metrics.pairwise import cosine_similarity
import numpy as np

  from tqdm.autonotebook import tqdm, trange


In [3]:
# Load models
nlp = spacy.load("en_core_web_sm")
sentence_model = SentenceTransformer('all-MiniLM-L6-v2')

def extract_key_facts(text):
    doc = nlp(text)
    facts = []
    for sent in doc.sents:
        if any(token.dep_ in ["nsubj", "dobj", "pobj"] for token in sent):
            facts.append(sent.text)
    return facts

def semantic_similarity(sent1, sent2):
    emb1 = sentence_model.encode([sent1])
    emb2 = sentence_model.encode([sent2])
    return cosine_similarity(emb1, emb2)[0][0]

def check_fact_fabrication(original_text, generated_text, similarity_threshold=0.7):
    original_facts = extract_key_facts(original_text)
    generated_facts = extract_key_facts(generated_text)
    
    fabricated_facts = []
    for gen_fact in generated_facts:
        max_similarity = max(semantic_similarity(gen_fact, orig_fact) for orig_fact in original_facts)
        if max_similarity < similarity_threshold:
            fabricated_facts.append(gen_fact)
    
    return fabricated_facts

modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md:   0%|          | 0.00/10.7k [00:00<?, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt:   0%|          | 0.00/232k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/466k [00:00<?, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]



1_Pooling/config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

In [4]:
from utils import load_json, load_txt
actual_facts = load_json("../bigger_study_sample/001-57899.json")['facts']
generated_facts = load_txt("./gpt-4/001-57899.txt")

In [6]:
fabricated_facts = check_fact_fabrication(actual_facts, generated_facts)

if fabricated_facts:
    print("Potential fabricated facts detected:")
    for fact in fabricated_facts:
        print(f"- {fact}")
else:
    print("No fabricated facts detected.")

Potential fabricated facts detected:
- The case involves Mr. Anthony Boner, a British citizen, born in 1960, who was convicted of multiple criminal offenses, including assault and armed robbery, in a trial held in the High Court of Justiciary, Scotland, between March 29 and April 10, 1990.
- The key facts of the case are as follows:


- The trial judge ruled that her earlier presence did not affect the fairness of her testimony.
- Involved in denying Mr. Boner’s request for legal aid to appeal his conviction.
   
- Representing the government during the appeal.
   
- - **European Commission of Human Rights:**
- Referred the case to the European Court of Human Rights (ECHR).
   -
- **European Court of Human Rights (ECHR):** Ultimately ruled on the violation of Mr. Boner’s rights under Article 6 para.
- 3 (c) of the European Convention on Human Rights.


- 3. **Legal Issue:**
   - The primary legal issue in the case was whether Mr. Boner’s right to free legal assistance under Article 6 p