In [15]:
import pandas as pd
import json

from annotated_types.test_cases import cases
from openai import OpenAI
import random
import time

In [16]:
input_file = "Data/Processed/processed_train_data.json"
output_file = "Data/Processed/processed_train_data_filtered_dpo.json"

In [17]:
# Step 1: Load the dataset
with open(input_file, 'r') as file:
    data = json.load(file)

# Convert to DataFrame for easier manipulation
df = pd.DataFrame(data)

# Step 2: Extract elements where is_Critical is True
critical_true = df[df['is_Critical'] == True]

# Step 3: Randomly sample 7000 elements where is_Critical is False
critical_false_sampled = df[df['is_Critical'] == False].sample(n=3000, random_state=42)

# Step 4: Combine both DataFrames
combined_df = pd.concat([critical_true, critical_false_sampled])

# Shuffle the combined dataset to avoid any ordering bias
combined_df = combined_df.sample(frac=1, random_state=42).reset_index(drop=True)

print("Combined dataset created successfully with the following distribution:")
print(combined_df['is_Critical'].value_counts())

Combined dataset created successfully with the following distribution:
is_Critical
True     3128
False    3000
Name: count, dtype: int64


## Evaluation

In [18]:
import pandas as pd
import spacy
import nltk

nltk.download('framenet_v17')
nlp = spacy.load("en_core_web_sm")

[nltk_data] Downloading package framenet_v17 to
[nltk_data]     /Users/ricostadeli/nltk_data...
[nltk_data]   Package framenet_v17 is already up-to-date!


### Cause to Effect

In [19]:
from nltk.corpus import framenet as fn


def get_causal_verbs_from_framenet():
    causal_frame_names = [
        "Causation", "Cause_change", "Cause_change_of_position_on_a_scale",
        "Cause_motion", "Cause_to_amalgamate", "Cause_to_start", "Cause_to_make_progress",
        "Causation_scenario", "Cause_to_end", "Cause_to_resume",
        "Cause_to_continue", "Cause_change_of_consistency", "Cause_expansion", "Cause_impact"
    ]

    causal_verbs = set()
    for frame_name in causal_frame_names:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:  # Only verbs
                    causal_verbs.add(lu['name'].split('.')[0])
        except Exception as e:
            print(f"Error loading frame '{frame_name}': {e}")

    return causal_verbs


causal_meta_terms = {"generalisation", "implies", "entail", "necessitate", "follow from", "inference"}
alternative_factor_terms = {"factor", "interfere", "influence", "affect", "contribute", "complicate"}


def detect_cause_to_effect(sentence):
    doc = nlp(sentence)
    explanations = []
    score = 0

    causal_verbs = get_causal_verbs_from_framenet()

    has_condition = any(tok.dep_ == "mark" and tok.text.lower() in {"if", "when"} for tok in doc)
    if has_condition:
        explanations.append("✓ Conditional clause detected (e.g., 'if', 'when')")
        score += 3

    has_advcl = any(tok.dep_ == "advcl" for tok in doc)
    if has_advcl:
        explanations.append("✓ Adverbial clause (likely effect clause) detected")
        score += 2

    has_causal_verb_structure = False
    for tok in doc:
        if tok.lemma_ in causal_verbs and tok.pos_ == "VERB":
            subj = any(child.dep_ == "nsubj" for child in tok.children)
            obj = any(child.dep_ == "dobj" for child in tok.children)
            prep = any(child.dep_ == "prep" for child in tok.children)
            if subj or obj or prep:
                has_causal_verb_structure = True
                explanations.append(
                    f"✓ Verb '{tok.lemma_}' is listed in FrameNet under causal frames with subject/object/prep"
                )
                score += 3
                if subj: score += 0.5
                if obj: score += 0.5
                if prep: score += 0.5
                break

    if any(tok.lemma_ in causal_meta_terms for tok in doc):
        explanations.append("✓ Causal generalisation or implication term detected (e.g., 'implies', 'generalisation')")
        score += 1

    if any(tok.lemma_ in alternative_factor_terms for tok in doc):
        explanations.append("✓ Terms indicating alternative causes or interfering factors detected")
        score += 1

    is_causal = has_condition and has_advcl or has_causal_verb_structure
    if not is_causal:
        causal_phrases = ["result in", "lead to", "may cause", "because of", "due to", "given rise to",
                          "resulting from", "stemming from", "driven by", "caused by", "attributed to", "stems from",
                          "reason", "result of", "consequence of", "owning to", "thus", "so", "therefore",
                          "hence"  "thereby"]
        if any(phrase in sentence.lower() for phrase in causal_phrases):
            explanations.append("✓ Phrase pattern matches known cause-to-effect trigger")
            score += 2

    score = min(score, 10)
    label = "Strong CauseToEffect" if score >= 7 else "Weak/Partial CauseToEffect" if score >= 4 else "Not CauseToEffect"
    return label, score, explanations

### Expert Opinion

In [20]:
def get_lexical_units_from_frames(frames):
    terms = set()
    for frame_name in frames:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:
                    terms.add(lu['name'].split('.')[0])
        except Exception as e:
            print(f"Warning: Could not load frame '{frame_name}': {e}")
    return terms


expert_frames = [
    "Expertise", "Judgment_communication", "Opinion",
    "Authority", "Statement", "Certainty"
]
quote_frames = ["Statement", "Judgment_communication"]
clarity_frames = ["Reasoning"]
evidence_frames = ["Evidence", "Certainty", "Causation"]

expert_verbs = get_lexical_units_from_frames(expert_frames)
quote_verbs = get_lexical_units_from_frames(quote_frames)
clarity_terms = get_lexical_units_from_frames(clarity_frames)
evidence_terms = get_lexical_units_from_frames(evidence_frames)


def detect_expert_opinion(question):
    doc = nlp(question)
    score = 0
    explanations = []

    expert_titles = {"expert", "researcher", "scientist", "doctor", "analyst", "professor", "Dr."}

    implicit_expert_terms = {"study", "research", "evidence", "report", "findings", "scientific", "government",
                             "official", "paper", "survey", "data"}
    comparison_cues = {"consistent", "align", "similar", "agree", "disagree", "corroborate", "conflict"}
    technical_request_verbs = {"define", "explain", "describe", "elaborate", "clarify"}
    assertion_verbs = {"assert", "affirm", "pronounce", "declare", "maintain", "claim", "state"}
    reference_terms = {"quote", "reference", "cite", "check", "verify", "source"}
    domain_terms = {"science", "scientific", "domain", "field", "discipline", "area", "sector"}

    for ent in doc.ents:
        if ent.label_ in {"PERSON", "ORG"}:
            if any(title in ent.text.lower() for title in expert_titles):
                explanations.append(f"✓ Expert entity detected: '{ent.text}'")
                score += 3
                break

    if any(tok.lemma_ in expert_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Detected expert-related verb from FrameNet")
        score += 2

    if any(tok.lemma_ in quote_verbs for tok in doc):
        explanations.append("✓ Quotation or claim verb found")
        score += 1

    if any(tok.lemma_ in clarity_terms for tok in doc):
        explanations.append("✓ Clarity/definition markers found")
        score += 1

    if any(tok.lemma_ in evidence_terms for tok in doc):
        explanations.append("✓ Evidence or support-related terms found")
        score += 2

    if any(tok.lemma_.lower() in implicit_expert_terms for tok in doc):
        explanations.append("✓ Implicit expert-related term detected (e.g., 'study', 'government')")
        score += 2

    if any(tok.lemma_.lower() in comparison_cues for tok in doc):
        explanations.append("✓ Cross-study comparison term detected (e.g., 'consistent', 'similar')")
        score += 0.5

    if any(tok.lemma_.lower() in technical_request_verbs for tok in doc):
        explanations.append("✓ Technical explanation request detected (e.g., 'define', 'explain')")
        score += 1

    if any(tok.dep_ == "attr" and tok.lemma_ == "expert" for tok in doc):
        explanations.append("✓ Predicate nominative indicating expertise detected (e.g., 'X is an expert')")
        score += 2

    if any(tok.lemma_.lower() in assertion_verbs for tok in doc):
        explanations.append("✓ Assertion or claim verb detected (e.g., 'assert', 'affirm')")
        score += 1

    if any(tok.lemma_.lower() in reference_terms for tok in doc):
        explanations.append("✓ Source/reference validation term detected (e.g., 'quote', 'reference')")
        score += 1

    if any(tok.lemma_.lower() in domain_terms for tok in doc):
        explanations.append("✓ Domain relevance indicator detected (e.g., 'science', 'domainD')")
        score += 1

    label = "Strong Expert Opinion" if score >= 7 else "Weak/Partial Expert Opinion" if score >= 4 else "Not Expert Opinion"
    return label, score, explanations

### Analogy

In [21]:
from nltk.corpus import wordnet as wn

nltk.download('wordnet')
analogy_synsets = [wn.synset('similar.a.01'), wn.synset('analogy.n.01'), wn.synset('compare.v.01')]

comparison_frames = ["Similarity"]
contrast_frames = ["Categorization"]
evidence_frames = ["Evidence", "Judgment_communication"]

comparison_verbs = get_lexical_units_from_frames(comparison_frames)
contrast_verbs = get_lexical_units_from_frames(contrast_frames)
evidence_verbs = get_lexical_units_from_frames(evidence_frames)


def is_semantically_analogical(token):
    token_synsets = wn.synsets(token.lemma_)
    for s in token_synsets:
        for analogy_syn in analogy_synsets:
            if s.path_similarity(analogy_syn) and s.path_similarity(analogy_syn) > 0.3:
                return True
    return False


analogy_context_cues = {"respect", "in which", "such that", "with regard to", "in terms of"}

analogy_force_cues = {"undermine", "weaken", "strengthen", "force of similarity", "degree of analogy"}

analogy_nouns = {"analogy", "comparison", "parallel", "similarity", "analogue"}


def detect_analogy_question(question):
    doc = nlp(question)
    score = 0
    explanations = []
    noun_chunks = list(doc.noun_chunks)

    if any(tok.lemma_ in comparison_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Comparison verb detected from FrameNet")
        score += 2.5

    entity_tokens = [tok for tok in doc if tok.pos_ in {"PROPN", "NOUN"}]
    if len(set(tok.lemma_ for tok in entity_tokens)) >= 2:
        explanations.append("✓ Contains at least two distinct concepts/entities")
        score += 1

    if any(tok.lemma_ in contrast_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Contrast or difference verb detected from FrameNet")
        score += 1

    if any(tok.lemma_ in evidence_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Evidence or justification verb found")
        score += 1

    if any(tok.tag_ == "MD" for tok in doc):
        score += 0.5

    if len(noun_chunks) >= 2 and any(tok.lemma_ in {"similar", "like", "as"} for tok in doc):
        explanations.append("✓ Two concepts compared with similarity cue (e.g., 'similar', 'like')")
        score += 3

    if any(tok.text.lower() == "if" for tok in doc):
        explanations.append("✓ Conditional structure suggesting hypothetical reasoning")
        score += 1

    if any(is_semantically_analogical(tok) for tok in doc if tok.pos_ in {"ADJ", "NOUN", "VERB"}):
        explanations.append("✓ Semantic similarity to analogy-related terms detected via WordNet")
        score += 2

    if any(tok.dep_ in {"prep", "relcl"} and tok.lemma_ in {"compare", "similar"} for tok in doc):
        explanations.append("✓ Syntactic cue of analogy (e.g., 'compared with', 'similar to')")
        score += 1

    if any(phrase in question.lower() for phrase in analogy_context_cues):
        explanations.append("✓ Contextual analogy marker detected (e.g., 'in which', 'such that')")
        score += 0.5

    if any(tok.lemma_ in analogy_force_cues for tok in doc):
        explanations.append("✓ Analogy evaluation term detected (e.g., 'undermine', 'strengthen')")
        score += 0.5

    if any(tok.lemma_ in analogy_nouns for tok in doc if tok.pos_ == "NOUN"):
        explanations.append("✓ Explicit analogy noun detected (e.g., 'analogy', 'comparison')")
        score += 2

    if any(tok.dep_ == "neg" for tok in doc):
        if any(tok.lemma_ in {"similar", "compare", "alike", "match"} for tok in doc):
            explanations.append("✓ Negated comparison detected (suggesting analogy breakdown)")
            score += 1

    score = min(score, 10)
    label = "Strong Analogy Question" if score >= 7 else "Weak/Partial Analogy Question" if score >= 4 else "Not Analogy Question"
    return label, score, explanations

[nltk_data] Downloading package wordnet to
[nltk_data]     /Users/ricostadeli/nltk_data...
[nltk_data]   Package wordnet is already up-to-date!


### Fear

In [22]:
def is_fear_related(token):
    syns = wn.synsets(token.lemma_)
    for s in syns:
        if any(s.path_similarity(wn.synset('danger.n.01')) or s.path_similarity(wn.synset('problem.n.01')) or
               s.path_similarity(wn.synset('fear.n.01')) or s.path_similarity(wn.synset('harm.n.01')) or
               s.path_similarity(wn.synset('threat.n.01')) for s in syns):
            return True
    return False


# ---- FrameNet Utility ----
def get_lexical_units_from_frames(frames):
    terms = set()
    for frame_name in frames:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:
                    terms.add(lu['name'].split('.')[0])
        except:
            continue
    return terms


# ---- Relevant Lexical Resources ----
causal_frames = ["Causation", "Cause_to_start", "Preventing", "Risk", "Threaten", "Danger"]
causal_verbs = get_lexical_units_from_frames(causal_frames)

fear_keywords = {"danger", "threat", "risky", "harm", "catastrophe", "crisis", "ruin", "fear", "worse", "bad", "fatal",
                 "negative", "die", "death"}
preventive_keywords = {"prevent", "avoid", "stop", "ban", "rescue", "save", "protect"}

urgency_keywords = {"immediately", "soon", "before it's too late", "critical", "urgent", "suddenly", "unexpectedly"}

possibility_terms = {"possible", "possibility", "likely", "likelihood", "chance", "probability", "conceivable",
                     "potential", "can", "could", "might", "may", "able"}


def detect_fear_appeal_question(question):
    doc = nlp(question)
    score = 0
    explanations = []

    if any(tok.lemma_.lower() in fear_keywords for tok in doc):
        explanations.append("✓ Fear-related keyword detected (e.g., 'threat', 'danger')")
        score += 3

    if any(tok.lemma_.lower() in preventive_keywords for tok in doc):
        explanations.append("✓ Preventive action verb detected (e.g., 'prevent', 'stop')")
        score += 2

    if any(tok.lemma_ in causal_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Causal/preventive verb from FrameNet detected")
        score += 2

    if any(tok.text.lower() in {"if", "unless"} for tok in doc):
        explanations.append("✓ Conditional clause found (e.g., 'if', 'unless')")
        score += 1

    if any(is_fear_related(tok) for tok in doc if tok.pos_ in {"NOUN", "VERB", "ADJ"}):
        explanations.append("✓ Semantic fear-related concept detected via WordNet")
        score += 2

    if any(phrase in question.lower() for phrase in urgency_keywords):
        explanations.append("✓ Urgency marker detected (e.g., 'immediately', 'before it's too late')")
        score += 1

    if any(tok.lemma_ in possibility_terms for tok in doc):
        explanations.append("✓ Possibility-related term detected (e.g., 'possible', 'feasible', 'chance')")
        score += 1

    score = min(score, 10)
    label = "Strong Fear Appeal" if score >= 7 else "Weak/Partial Fear Appeal" if score >= 4 else "Not Fear Appeal"
    return label, score, explanations

## Augmentation

In [23]:
api_key = "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"
client = OpenAI(api_key=api_key)

schemas_template = {
    "CauseToEffect": """'Cause to Effect' with the examples:
    How strong is the generalisation that if <eventA> then <eventB>?
    Are there other factors in this particular case that could have interfered with the event of ‘<eventB>’?""",

    "ExpertOpinion": """'Expert Opinion' with the examples:
    Is <expertE> a genuine expert in <domainD>?
    Is <eventA> consistent with what other experts in <domainD> say?""",

    "Analogy": """'Analogy' with the examples:
    Are <C1> and <C2> similar in the respect cited?
    Is <eventA> true in <C1>?""",

    "FearAppeal": """'Fear Appeal' with the examples:
    Is <eventB> bad? Why and to whom is it bad?
    Is <eventA> a way to prevent <eventB>?"""
}

In [24]:
def generate_question(context, schema_description):
    prompt = (f"Generate one critical question addressing the provided context. "
              f"Ensure it matches the schema: {schema_description}\n\n"
              f"Context: {context}\n\n"
              f"Question:")

    try:
        response = client.chat.completions.create(
            model="gpt-3.5-turbo",
            messages=[{"role": "user", "content": prompt}],
            temperature=0.7,
            max_tokens=60,
        )
        question = response.choices[0].message.content.strip()
        return question
    except Exception as e:
        print(f"Error generating question: {e}")
        return None

### Generate bad questions

In [25]:
# Preparing the final dataset
final_data = []
i = 0
for idx, row in combined_df.iterrows():
    if row['is_Critical']:
        chosen_question = row['question']  # Save the old question
        schema_options = [schema for schema in ['CauseToEffect', 'ExpertOpinion', 'Analogy', 'FearAppeal']
                          if row.get(schema, 0) > 0]
        chosen_schema = random.choice(schema_options) if schema_options else random.choice(
            list(schemas_template.keys()))
        schema_description = schemas_template[chosen_schema]

        score_chosen = row[chosen_schema]
        attempts = 0
        score = 0
        rejected_question = None

        # Attempt to generate a better question
        while attempts < 3:
            attempts += 1
            generated_question = generate_question(row['context'], schema_description)

            if generated_question is None:
                continue

            # Evaluate generated question
            if chosen_schema == "CauseToEffect":
                label, score, explanation = detect_cause_to_effect(generated_question)
            elif chosen_schema == "ExpertOpinion":
                label, score, explanation = detect_expert_opinion(generated_question)
            elif chosen_schema == "Analogy":
                label, score, explanation = detect_analogy_question(generated_question)
            elif chosen_schema == "FearAppeal":
                label, score, explanation = detect_fear_appeal_question(generated_question)
            else:
                score = 0

            print(
                f"ID: {i}: Attempt {attempts}, Score: {score}, Schema: {chosen_schema} Generated question:\n{generated_question}")

            # If the question is not good enough, reject it
            if score < 7:
                rejected_question = generated_question
                break

        final_data.append({
            "id": i,
            "prompt": [{"role": "user",
                        "content": f"Generate one critical question addressing the provided context. Ensure it matches the schema: {chosen_schema}\n\nContext: {row['context']}"}],
            "chosen": [{"role": "assistant", "content": chosen_question}],
            "rejected": [{"role": "assistant", "content": rejected_question if rejected_question else "Failed"}],
            "score_chosen": score_chosen,
            "score_rejected": score,
            "schema": chosen_schema,
            "context": row['context']
        })

        # Respect API rate limits
        time.sleep(0.1)
        i = i + 1

# Convert final data into DataFrame and save it
final_df = pd.DataFrame(final_data)
import json

with open(output_file, 'w') as f:
    json.dump(final_data, f, indent=4)

print(f"Dataset successfully created and saved to {output_file}.")


ID: 0: Attempt 1, Score: 6.5, Schema: Analogy Generated question:
Are school teachers' compensation and a living wage similar in terms of financial adequacy?
ID: 1: Attempt 1, Score: 6, Schema: Analogy Generated question:
Are the potential issues with re-votes in a popular vote system similar to those in the current Electoral College system?
ID: 2: Attempt 1, Score: 2, Schema: CauseToEffect Generated question:
How significant is the impact of family background on perpetuating inequality and unfairness in society?
ID: 3: Attempt 1, Score: 0, Schema: CauseToEffect Generated question:
Are there other potential consequences of advanced artificial intelligence beyond the scenario of robots being programmed to harm humans?
ID: 4: Attempt 1, Score: 6, Schema: FearAppeal Generated question:
Is the use of fear appeal a tactic employed by liberals to enforce their political beliefs, and if so, to what extent does it undermine the democratic values they claim to support?
ID: 5: Attempt 1, Score: 

In [26]:
from datasets import load_dataset

dataset = load_dataset('json', data_files=output_file)
print(dataset)

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'chosen', 'rejected', 'score_chosen', 'score_rejected', 'schema', 'context'],
        num_rows: 3128
    })
})


In [29]:
# Load the JSON data
with open(output_file, 'r') as f:
    data = json.load(f)

# Filter out entries where any item in 'rejected' has content "Failed"
filtered_data = []
for item in data:
    if not any(entry.get('content') == 'Failed' for entry in item.get('rejected', [])):
        filtered_data.append(item)

# Save the cleaned data to a new file
with open(output_file, 'w') as f:
    json.dump(filtered_data, f, indent=2)

print(f"Removed {len(data) - len(filtered_data)} entries with 'Failed' in rejected content.")

Removed 59 entries with 'Failed' in rejected content.


In [30]:
dataset = load_dataset('json', data_files=output_file)
print(dataset)

Generating train split: 0 examples [00:00, ? examples/s]

DatasetDict({
    train: Dataset({
        features: ['id', 'prompt', 'chosen', 'rejected', 'score_chosen', 'score_rejected', 'schema', 'context'],
        num_rows: 3069
    })
})


In [31]:
print(dataset['train'][0])

{'id': 0, 'prompt': [{'content': "Generate one critical question addressing the provided context. Ensure it matches the schema: Analogy\n\nContext: implication_consequences: The argument isn't that school teachers' compensation is adequate. The argument is that everyone should be paid a living wage. Someone asked how much a living wage was, and OP responded with an estimation of 40-50k.", 'role': 'user'}], 'chosen': [{'content': 'How much would a big Mac be if every employee made 50k a year?', 'role': 'assistant'}], 'rejected': [{'content': "Are school teachers' compensation and a living wage similar in terms of financial adequacy?", 'role': 'assistant'}], 'score_chosen': 4.5, 'score_rejected': 6.5, 'schema': 'Analogy', 'context': "implication_consequences: The argument isn't that school teachers' compensation is adequate. The argument is that everyone should be paid a living wage. Someone asked how much a living wage was, and OP responded with an estimation of 40-50k."}
