# General Setup

This file was used to create the datasets that we used in `probing_experiments.ipynb`

In [None]:
import random
import csv
import pandas as pd

# Generation of Initial Dataset
The created dataset was used as training and test set for Experiment 1 and as training set for Experiment 2.  
**Please note that rerunning the script will lead to different datasets (but with the same structure), due to the stochastic nature of the process.**

In [None]:
# ------------------------
# 1. Define placeholder pools
# ------------------------
names = ["Alice", "Bob", "Charlie", "Diana", "Eve", "Frank", "Grace", "Hank",
         "Ivy", "Jack", "Kara", "Leo", "Mona", "Nina", "Oscar", "Paul",
         "Quinn", "Rita", "Sam", "Tina", "Uma", "Victor", "Wendy", "Xander",
         "Yara", "Zane"]

traits = ["happy", "sad", "tall", "short", "kind", "angry", "brave", "calm",
          "clever", "curious", "gentle", "honest", "lazy", "loud", "polite",
          "proud", "quiet", "rude", "shy", "smart"]

# ------------------------
# 2. Define reasoning pattern templates
# ------------------------
templates = {
    "Modus Ponens": "If {X} is {P}, then {Y} is {Q}. {X} is {P}. Is {Y} {Q}?",
    "Modus Tollens": "If {X} is {P}, then {Y} is {Q}. {Y} is not {Q}. Is {X} not {P}?",
    "Affirming the Consequent": "If {X} is {P}, then {Y} is {Q}. {Y} is {Q}. Is {X} {P}?",
    "Denying the Antecedent": "If {X} is {P}, then {Y} is {Q}. {X} is not {P}. Is {Y} not {Q}?"
}

validity_map = {
    "Modus Ponens": "Yes",
    "Modus Tollens": "Yes",
    "Affirming the Consequent": "No",
    "Denying the Antecedent": "No"
}

# ------------------------
# 3. Example generator
# ------------------------
def generate_examples(pattern, n=500):
    template = templates[pattern]
    examples = set()

    while len(examples) < n:
        X, Y = random.sample(names, 2)  # ensure X != Y
        P = random.choice(traits)
        Q = random.choice([t for t in traits if t != P])  # avoid trivial P=Q
        sentence = template.format(X=X, Y=Y, P=P, Q=Q)
        examples.add(sentence)

    return [{"text_input": s,
             "reasoning_class": pattern,
             "validity": validity_map[pattern]} for s in examples]

# ------------------------
# 4. Generate dataset
# ------------------------
dataset = []
for pattern in templates:
    dataset.extend(generate_examples(pattern, n=500))

print(f"Total examples generated: {len(dataset)}")  # should be 2000

# ------------------------
# 5. Save to CSV
# ------------------------
csv_file = "reasoning_patterns_dataset.csv"
with open(csv_file, mode="w", newline="", encoding="utf-8") as f:
    writer = csv.DictWriter(f, fieldnames=["text_input", "reasoning_class", "validity"])
    writer.writeheader()
    for ex in dataset:
        writer.writerow(ex)

print(f"Dataset saved to {csv_file}")


# Generation of OOD Test Set
The created dataset was used as test set for experiment 2.

In [None]:
# Entities & attributes
names = ["Alice", "Bob", "Carol", "Dan", "Eve"]
traits = ["tall", "happy", "angry", "hungry", "sleepy"]

def generate_examples(n=500):
    data = []
    
    for _ in range(n):
        X, Y = random.sample(names, 2)
        P, Q = random.sample(traits, 2)

        # Modus Ponens (valid)
        mp = f"Whenever {X} is {P}, subsequently {Y} is {Q}. {X} is {P}. Is {Y} {Q}?"
        data.append((mp, "Modus Ponens", "Yes"))

        # Modus Tollens (valid)
        mt = f"Whenever {X} is {P}, subsequently {Y} is {Q}. {Y} is not {Q}. Is {X} {P}?"
        data.append((mt, "Modus Tollens", "Yes"))

        # Affirming the Consequent (invalid)
        ac = f"Whenever {X} is {P}, subsequently {Y} is {Q}. {Y} is {Q}. Is {X} {P}?"
        data.append((ac, "Affirming the Consequent", "No"))

        # Denying the Antecedent (invalid)
        da = f"Whenever {X} is {P}, subsequently {Y} is {Q}. {X} is not {P}. Is {Y} {Q}?"
        data.append((da, "Denying the Antecedent", "No"))

    return pd.DataFrame(data, columns=["text_input", "reasoning_class", "validity"])

# Generate dataset
df_test = generate_examples(500)

# Save
df_test.to_csv("reasoning_patterns_test_whenever.csv", index=False)
print("Test dataset generated with whenever/subsequently phrasing (2000 rows)!")
