### CRITICAL QUESTION DEFINITION & EVALUATION using SPACY

### 1. schema: Cause to effect CTE

In [4]:
import spacy
import nltk

nltk.download('framenet_v17')

nlp = spacy.load("en_core_web_sm")
from nltk.corpus import framenet as fn

def get_causal_verbs_from_framenet():
    causal_frame_names = [
        "Causation", "Cause_change", "Cause_change_of_position_on_a_scale",
        "Cause_motion", "Cause_to_amalgamate", "Cause_to_start", "Cause_to_make_progress"
    ]

    causal_verbs = set()
    for frame_name in causal_frame_names:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:  # Only verbs
                    causal_verbs.add(lu['name'].split('.')[0])
        except Exception as e:
            print(f"Error loading frame '{frame_name}': {e}")
    
    return causal_verbs

causal_verbs = get_causal_verbs_from_framenet()

def detect_cause_to_effect(sentence):
    doc = nlp(sentence)
    explanations = []
    score = 0

    has_condition = any(tok.dep_ == "mark" and tok.text.lower() in {"if", "when"} for tok in doc)
    if has_condition:
        explanations.append("✓ Conditional clause detected (e.g., 'if', 'when')")
        score += 2

    has_advcl = any(tok.dep_ == "advcl" for tok in doc)
    if has_advcl:
        explanations.append("✓ Adverbial clause (likely effect clause) detected")
        score += 2

    has_causal_verb_structure = False

    for tok in doc:
        if tok.lemma_ in causal_verbs and tok.pos_ == "VERB":
            subj = any(child.dep_ == "nsubj" for child in tok.children)
            obj = any(child.dep_ == "dobj" for child in tok.children)
            prep = any(child.dep_ == "prep" for child in tok.children)
            if subj or obj or prep:
                has_causal_verb_structure = True
                explanations.append(
                    f"✓ Verb '{tok.lemma_}' is listed in FrameNet under causal frames and appears with subject/object – "
                    f"possible causality depending on context"
                )
                score += 2
                if subj: score += 1
                if obj: score += 1
                if prep: score += 1
                break

    is_causal = has_condition and has_advcl or has_causal_verb_structure

    if not is_causal:
        causal_phrases = ["result in", "lead to", "cause", "because of", "due to"]
        if any(phrase in sentence.lower() for phrase in causal_phrases):
            explanations.append("✓ Phrase pattern matches known cause-to-effect trigger")
            is_causal = True
            score += 1

    # Clamp max score to 10
    score = min(score, 10)
    return is_causal, explanations, score


def get_causal_verbs_from_framenet():
    # TODO: defined FrameNet frames related to causation -> to check!!
    causal_frame_names = [
        "Causation", "Cause_change", "Cause_change_of_position_on_a_scale",
        "Cause_motion", "Cause_to_amalgamate", "Cause_to_start", "Cause_to_make_progress"
    ]

    causal_verbs = set()

    for frame_name in causal_frame_names:
        frame = fn.frame_by_name(frame_name)
        for lu in frame.lexUnit.values():
            if '.v' in lu['name']:  # Only verbs
                causal_verbs.add(lu['name'].split('.')[0])

    return causal_verbs

sentences = [
    "What would happen if taxes increased?",                         # CTE
    "Could rising sea levels result in frequent flooding?",          # CTE
    "Why do doctors recommend exercise?",                             # no
    "Did the rain cause the game to be canceled?",                   #CTE
    "What’s the reason for lowering interest rates?",                # no
    "Is it sunny today or just bright outside?",                     # no
    "What happens if students don’t study?",                         # CTE
    "Why do you prefer chocolate over vanilla?",                     # no
]


for sentence in sentences:
    result, explanation, score = detect_cause_to_effect(sentence)
    if score >= 7:
        label = "Strong CauseToEffect"
    elif score >= 4:
        label = "Weak/Partial CauseToEffect"
    else:
        label = "Not CauseToEffect"
    print(f"\nSentence: {sentence}")
    print(f"CTE: {score}/10 → Label: {label}")
    for e in explanation:
        print(f"   {e}")


[nltk_data] Downloading package framenet_v17 to
[nltk_data]     C:\Users\cedri\AppData\Roaming\nltk_data...
[nltk_data]   Package framenet_v17 is already up-to-date!



Sentence: What would happen if taxes increased?
CTE: 7/10 → Label: Strong CauseToEffect
   ✓ Conditional clause detected (e.g., 'if', 'when')
   ✓ Adverbial clause (likely effect clause) detected
   ✓ Verb 'increase' is listed in FrameNet under causal frames and appears with subject/object – possible causality depending on context

Sentence: Could rising sea levels result in frequent flooding?
CTE: 1/10 → Label: Not CauseToEffect
   ✓ Phrase pattern matches known cause-to-effect trigger

Sentence: Why do doctors recommend exercise?
CTE: 0/10 → Label: Not CauseToEffect

Sentence: Did the rain cause the game to be canceled?
CTE: 3/10 → Label: Not CauseToEffect
   ✓ Verb 'cause' is listed in FrameNet under causal frames and appears with subject/object – possible causality depending on context

Sentence: What’s the reason for lowering interest rates?
CTE: 3/10 → Label: Not CauseToEffect
   ✓ Verb 'lower' is listed in FrameNet under causal frames and appears with subject/object – possible 

### SOCRATIC dataset filtering

In [7]:
import pandas as pd
import spacy

# filter relevant context length 
df = pd.read_csv("../Data/Raw/SocraticQ/train_chunk_I.csv", names=["category", "context", "question"])
df["context_token_len"] = df["context"].apply(lambda text: len(nlp(text)))
filtered_df = df[df["context_token_len"] >= 25].copy()

print(f"Total rows: {len(df)}")
print(f"Rows after filtering: {len(filtered_df)}")


KeyboardInterrupt: 

In [None]:
filtered_df.head()

- test CauseToEffect performance 

In [5]:

def apply_cte_detection(question):
    result, explanations, score = detect_cause_to_effect(question)
    if score >= 7:
        label = "Strong CauseToEffect"
    elif score >= 4:
        label = "Weak/Partial CauseToEffect"
    else:
        label = "Not CauseToEffect"

    return pd.Series({
        "cte_score": score,
        "cte_label": label,
        "cte_explanations": " | ".join(explanations)  
    })

filtered_df[["cte_score", "cte_label", "cte_explanations"]] = filtered_df["question"].apply(apply_cte_detection)


NameError: name 'filtered_df' is not defined

In [None]:
filtered_df

In [None]:
filtered_df["cte_label"].value_counts()

In [None]:
strong_cte_df = filtered_df[filtered_df["cte_label"] == "Strong CauseToEffect"].copy()
strong_cte_df

In [None]:
strong_cte_df = filtered_df[filtered_df["cte_label"] == "Weak/Partial CauseToEffect"].copy()
strong_cte_df

- LLM validation of CTE labels only on target rows (with a syntactically indication)

In [None]:
target_rows = filtered_df[
    filtered_df["cte_label"].isin(["Weak/Partial CauseToEffect", "Strong CauseToEffect"])
].copy()


In [None]:
from transformers import pipeline
from huggingface_hub import login
import os
import dotenv
dotenv.load_dotenv()


hf_token = os.getenv("HUGGINGFACE_API_TOKEN")
login(token=hf_token)


def prompt_cte_judgment(context, question, score, label):
    return f"""
You are an expert in argumentation theory.

Context:
{context}

Question:
{question}

A rule-based system has analyzed this question and determined:
- CauseToEffect Score: {score}/10
- Heuristic Label: {label}

The CauseToEffect score reflects structural indicators (e.g., conditional clauses, causal verbs, prepositions).

Does this question genuinely reflect a Cause-to-Effect relationship? For example, does it suggest that one event or condition leads to another?

Use the following labels:
- Confirmed CauseToEffect → Strong evidence of causal reasoning
- Plausible CauseToEffect → Some indicators but less clear
- Not CauseToEffect → No signs of causal structure
- Needs Human Review → Ambiguous or depends heavily on context

Answer:
""".strip()



llm = pipeline(
    "text-generation",
    model="tiiuae/falcon-rw-1b",
    device=-1
)


def llm_evaluate_cte_open(context, question, score, label):  
    prompt = prompt_cte_judgment(context, question, score, label)
    response = llm(prompt, max_new_tokens=50, do_sample=False)
    reply = response[0]["generated_text"].split("Answer:")[-1].strip()
    return reply


In [None]:
subset_df = target_rows.sample(n=200, random_state=42).copy()
subset_df.shape

In [None]:
from tqdm import tqdm
tqdm.pandas()
subset_df = target_rows.sample(n=200, random_state=42).copy()
subset_df["llm_cte_label"] = subset_df.progress_apply(
    lambda row: llm_evaluate_cte_open(row["context"], row["question"], row["cte_score"], row["cte_label"]),
    axis=1
)


In [None]:

subset_df.to_excel("train_chunk_1_CTE.xlsx", index=False)

subset_df

### 2. schema: Expert opinion

In [None]:
import spacy
import nltk
from nltk.corpus import framenet as fn

nltk.download('framenet_v17')
nlp = spacy.load("en_core_web_sm")

# ----------- Generalized FrameNet Loader -----------
def get_lexical_units_from_frames(frames):
    terms = set()
    for frame_name in frames:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:
                    terms.add(lu['name'].split('.')[0])
        except Exception as e:
            print(f"Warning: Could not load frame '{frame_name}': {e}")
    return terms

# ----------- Extract FrameNet-based Verb Sets -----------

expert_frames = [
    "Expertise", "Judgment_communication", "Opinion",
    "Authority", "Statement", "Certainty"
]
quote_frames = ["Statement", "Judgment_communication"]
clarity_frames = ["Reasoning"]
evidence_frames = ["Evidence", "Certainty", "Causation"]


expert_verbs = get_lexical_units_from_frames(expert_frames)
quote_verbs = get_lexical_units_from_frames(quote_frames)
clarity_terms = get_lexical_units_from_frames(clarity_frames)
evidence_terms = get_lexical_units_from_frames(evidence_frames)

# ----------- Detection Function -----------

def detect_expert_opinion(question):
    doc = nlp(question)
    score = 0
    explanations = []

    expert_titles = {"expert", "researcher", "scientist", "doctor", "analyst", "professor", "Dr."}

    for ent in doc.ents:
        if ent.label_ in {"PERSON", "ORG"}:
            if any(title in ent.text.lower() for title in expert_titles):
                explanations.append(f"✓ Expert entity detected: '{ent.text}'")
                score += 2
                break

    if any(tok.lemma_ in expert_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Detected expert-related verb from FrameNet")
        score += 2

    if any(tok.lemma_ in quote_verbs for tok in doc):
        explanations.append("✓ Quotation or claim verb found")
        score += 1

    if any(tok.lemma_ in clarity_terms for tok in doc):
        explanations.append("✓ Clarity/definition markers found")
        score += 1

    if any(tok.lemma_ in evidence_terms for tok in doc):
        explanations.append("✓ Evidence or support-related terms found")
        score += 2

    if score >= 6:
        label = "Strong Expert Opinion"
    elif score >= 3:
        label = "Weak/Partial Expert Opinion"
    else:
        label = "Not Expert Opinion"

    return label, score, explanations

# ----------- Example Usage -----------

questions = [
    "Did Professor Lee actually say that the results were inconclusive?",
    "Is this study consistent with what other experts have found?",
    "Why do cats sleep so much?",
    "Are there any recent studies supporting this?",
    "What did the government state about inflation?",
    "Is the evidence consistent with past research?",
    "Can you define the technical term 'quantum entanglement'?",
    "Who won the World Cup in 2018?"
]

for q in questions:
    label, score, explanation = detect_expert_opinion(q)
    print(f"\nQuestion: {q}")
    print(f"Label: {label} | Score: {score}/10")
    for e in explanation:
        print(f"   {e}")


In [None]:

filtered_df[["expert_label", "expert_score", "expert_explanations"]] = filtered_df["question"].apply(
    lambda q: pd.Series(detect_expert_opinion(q))
)


In [None]:
filtered_df["expert_label"].value_counts()

In [None]:
strong_expert_df = filtered_df[filtered_df["expert_label"] == "Strong Expert Opinion"].copy()
strong_expert_df

# 3. schema: analogy

In [None]:
import nltk
nltk.download('wordnet')
from nltk.corpus import wordnet as wn

# Get synsets for analogy-related terms
analogy_synsets = [wn.synset('similar.a.01'), wn.synset('analogy.n.01'), wn.synset('compare.v.01')]

def is_semantically_analogical(token):
    token_synsets = wn.synsets(token.lemma_)
    for s in token_synsets:
        for analogy_syn in analogy_synsets:
            if s.path_similarity(analogy_syn) and s.path_similarity(analogy_syn) > 0.3:
                return True
    return False

def get_lexical_units_from_frames(frames):
    terms = set()
    for frame_name in frames:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:
                    terms.add(lu['name'].split('.')[0])
        except Exception as e:
            print(f"Warning: Could not load frame '{frame_name}': {e}")
    return terms

# FrameNet sets
comparison_frames = ["Similarity"]
contrast_frames = ["Categorization"]
evidence_frames = ["Evidence", "Judgment_communication"]

comparison_verbs = get_lexical_units_from_frames(comparison_frames)
contrast_verbs = get_lexical_units_from_frames(contrast_frames)
evidence_verbs = get_lexical_units_from_frames(evidence_frames)

# Analogy detection function
def detect_analogy_question(question):
    doc = nlp(question)
    score = 0
    explanations = []
    noun_chunks = list(doc.noun_chunks)

    # Comparison markers
    if any(tok.lemma_ in comparison_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Comparison verb detected from FrameNet")
        score += 2
    
    #pairwise comprison
    entity_tokens = [tok for tok in doc if tok.pos_ in {"PROPN", "NOUN"}]
    if len(set([tok.lemma_ for tok in entity_tokens])) >= 2:
        score += 1
        explanations.append("✓ Contains at least two distinct concepts/entities")

    # Contrast markers
    if any(tok.lemma_ in contrast_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Contrast or difference verb detected from FrameNet")
        score += 1

    # Evidence markers
    if any(tok.lemma_ in evidence_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Evidence or justification verb found")
        score += 1
        
    if any(tok.tag_ in {"MD"} for tok in doc):  # Modal verbs like would, could
        score += 1

    if len(noun_chunks) >= 2 and any(tok.lemma_ in {"similar", "like", "as"} for tok in doc):
        explanations.append("✓ Two concepts compared with similarity cue (e.g., 'similar', 'like')")
        score += 2


    # Conditional marker (e.g., 'if')
    if any(tok.text.lower() == "if" for tok in doc):
        explanations.append("✓ Conditional structure suggesting hypothetical reasoning")
        score += 1
        
    if any(is_semantically_analogical(tok) for tok in doc if tok.pos_ in {"ADJ", "NOUN", "VERB"}):
        explanations.append("✓ Semantic similarity to analogy-related terms detected via WordNet")
        score += 2
        
    if any(tok.dep_ in {"prep", "relcl"} and tok.lemma_ in {"compare", "similar"} for tok in doc):
        score += 1
        explanations.append("✓ Syntactic cue of analogy (e.g., 'compared with', 'similar to')")



    # Label assignment
    if score >= 8:
        label = "Strong Analogy Question"
    elif score >= 5:
        label = "Weak/Partial Analogy Question"
    else:
        label = "Not Analogy Question"

    return label, score, explanations


In [None]:
questions = [
    "Are cats and dogs similar in how they form social bonds?",
    "Is democracy in the U.S. similar to that in ancient Greece?",
    "If Finland succeeded with this policy, would it work in Germany?",
    "What did Plato say about justice?",
    "Are there differences between ancient Rome and the modern EU?",
    "Who won the match yesterday?"
]

for q in questions:
    label, score, explanation = detect_analogy_question(q)
    print(f"\nQuestion: {q}")
    print(f"Label: {label} | Score: {score}/10")
    for e in explanation:
        print(f"   {e}")


In [None]:

df = pd.read_csv("Data/Raw/SocraticQ/train_chunk_I.csv", names=["category", "context", "question"])
df["context_token_len"] = df["context"].apply(lambda text: len(nlp(text)))
df_filtered = df[df["context_token_len"] >= 25].copy()

print(f"Total rows: {len(df)}")

In [None]:
df_filtered[["analogy_label", "analogy_score", "analogy_explanations"]] = df_filtered["question"].apply(
    lambda q: pd.Series(detect_analogy_question(q))
)

In [None]:
df_filtered["analogy_label"].value_counts()


In [None]:
strong_analogy_df = df_filtered[df_filtered["analogy_label"] == "Strong Analogy Question"].copy()
strong_analogy_df

# 4. Bias 


In [None]:
import spacy
import nltk
from nltk.corpus import framenet as fn

# Setup
nltk.download('framenet_v17')
nlp = spacy.load("en_core_web_sm")

from nltk.corpus import wordnet as wn

def is_fear_related(token):
    syns = wn.synsets(token.lemma_)
    for s in syns:
        if any(s.path_similarity(wn.synset('danger.n.01')) or
               s.path_similarity(wn.synset('fear.n.01')) or
               s.path_similarity(wn.synset('threat.n.01')) for s in syns):
            return True
    return False


# ---- FrameNet Utility ----
def get_lexical_units_from_frames(frames):
    terms = set()
    for frame_name in frames:
        try:
            frame = fn.frame_by_name(frame_name)
            for lu in frame.lexUnit.values():
                if '.v' in lu['name']:
                    terms.add(lu['name'].split('.')[0])
        except:
            continue
    return terms

# ---- Relevant Lexical Resources ----
causal_frames = ["Causation", "Cause_to_start", "Preventing", "Risk", "Threaten", "Danger"]
causal_verbs = get_lexical_units_from_frames(causal_frames)

fear_keywords = {"danger", "threat", "risky", "harm", "catastrophe", "crisis", "ruin", "fear", "worse", "bad", "fatal"}
preventive_keywords = {"prevent", "avoid", "stop", "ban", "rescue", "save", "protect"}
modal_keywords = {"might", "could", "would", "may", "should"}

# ---- Detection Function ----
def detect_fear_appeal_question(question):
    doc = nlp(question)
    score = 0
    explanations = []

    # Modal structure
    if any(tok.lemma_ in modal_keywords for tok in doc if tok.tag_ == "MD"):
        explanations.append("✓ Modal verb detected (e.g., 'might', 'would') suggesting hypothetical risk")
        score += 1

    # Threat/danger terms
    if any(tok.lemma_.lower() in fear_keywords for tok in doc):
        explanations.append("✓ Fear-related keyword detected (e.g., 'threat', 'danger')")
        score += 2

    # Prevention-related words
    if any(tok.lemma_.lower() in preventive_keywords for tok in doc):
        explanations.append("✓ Preventive action verb detected (e.g., 'prevent', 'stop')")
        score += 2

    # Causal verbs from FrameNet
    if any(tok.lemma_ in causal_verbs for tok in doc if tok.pos_ == "VERB"):
        explanations.append("✓ Causal/preventive verb from FrameNet detected")
        score += 2

    # Hypothetical or conditional reasoning
    if any(tok.text.lower() in {"if", "unless"} for tok in doc):
        explanations.append("✓ Conditional clause found (e.g., 'if', 'unless')")
        score += 1
    
    if any(is_fear_related(tok) for tok in doc if tok.pos_ in {"NOUN", "VERB", "ADJ"}):
        explanations.append("✓ Semantic fear-related concept detected via WordNet")
        score += 2
        
    # Final label
    if score >= 6:
        label = "Strong Fear Appeal"
        
    elif score >= 4:
        label = "Weak/Partial Fear Appeal"
    else:
        label = "Not Fear Appeal"

    return label, score, explanations


In [None]:
questions = [
    "If we don't regulate AI, it might take over critical infrastructure.",
    "Is banning TikTok the only way to prevent surveillance?",
    "Could this policy save us from a financial disaster?",
    "Why is inflation bad for the middle class?",
    "Should we stop immigration to prevent job loss?",
    "Who benefits from this healthcare policy?",
    "What happens if we ignore climate change?"
]

for q in questions:
    label, score, explanation = detect_fear_appeal_question(q)
    print(f"\nQuestion: {q}")
    print(f"Label: {label} | Score: {score}/10")
    for e in explanation:
        print(f"   {e}")


In [None]:
df_filtered[["fear_label", "fear_score", "fear_explanations"]] = df_filtered["question"].apply(
    lambda q: pd.Series(detect_fear_appeal_question(q))
)


In [None]:
df_filtered.value_counts()

In [None]:
strong_expert_df = df_filtered[df_filtered["fear_label"] == "Strong Fear Appeal"].copy()
strong_expert_df

In [None]:
df_filtered["fear_label"].value_counts()