In [6]:
import os
import json
import random
import pandas as pd
from datetime import datetime
from collections import defaultdict, Counter
import time
from WIQACausalBuilder import WIQACausalBuilder
import ollama
from datasets import load_dataset
ds = load_dataset('allenai/wiqa', split="validation", trust_remote_code=True)
# Set random seed for reproducibility
random.seed(42)

In [7]:
ds[2]

{'question_stem': 'suppose there is no sunlight for the tree to grow happens, how will it affect LESS rain.',
 'question_para_step': ['Water vapor gets into the atmosphere through a process called evaporation',
  'This then turns the water that is at the top of oceans, rivers and lakes into water vapor in the atmosphere using energy from the sun',
  'The water vapor rises in the atmosphere and there it cools down',
  'Water vapor rises in the atmosphere and there it cools down and forms tiny water droplets through something called condensation',
  'These then turn into clouds',
  'When they all combine together, they grow bigger and are too heavy to stay up there in the air',
  'This is when they will fall to the ground as rain, or maybe snow or hail by gravity',
  ''],
 'answer_label': 'no_effect',
 'answer_label_as_choice': 'C',
 'choices': {'text': ['more', 'less', 'no effect'], 'label': ['A', 'B', 'C']},
 'metadata_question_id': 'out_of_para:876:1911:73',
 'metadata_graph_id': '191

In [8]:
# Create new dataset grouped by metadata_question_type
data_by_type = defaultdict(list)

for item in ds:
    filtered_item = {
        'question_stem': item['question_stem'],
        'answer_label': item['answer_label'],
        'answer_label_as_choice': item['answer_label_as_choice'],
        'choices': {
            'text': item['choices']['text'],
            'label': item['choices']['label']
        }
    }
    question_type = item['metadata_question_type']
    data_by_type[question_type].append(filtered_item)

# Print statistics
print(f"Total records: {sum(len(v) for v in data_by_type.values())}")
print(f"\nBreakdown by question type:")
for qtype, data in sorted(data_by_type.items()):
    print(f"  {qtype}: {len(data)} records")

# Save each type to separate JSON files
for qtype, data in data_by_type.items():
    output_file = f'wiqa_filtered_{qtype}.json'
    with open(output_file, 'w', encoding='utf-8') as f:
        json.dump(data, f, indent=2, ensure_ascii=False)
    print(f"\nSaved {len(data)} records to {output_file}")
    print(f"Example from {qtype}:")
    print(json.dumps(data[0], indent=2, ensure_ascii=False))

Total records: 6894

Breakdown by question type:
  EXOGENOUS_EFFECT: 2941 records
  INPARA_EFFECT: 1655 records
  OUTOFPARA_DISTRACTOR: 2298 records

Saved 2941 records to wiqa_filtered_EXOGENOUS_EFFECT.json
Example from EXOGENOUS_EFFECT:
{
  "question_stem": "suppose squirrels get sick happens, how will it affect squirrels need more food.",
  "answer_label": "more",
  "answer_label_as_choice": "A",
  "choices": {
    "text": [
      "more",
      "less",
      "no effect"
    ],
    "label": [
      "A",
      "B",
      "C"
    ]
  }
}

Saved 1655 records to wiqa_filtered_INPARA_EFFECT.json
Example from INPARA_EFFECT:
{
  "question_stem": "suppose the female is sterile happens, how will it affect LESS rabbits.",
  "answer_label": "more",
  "answer_label_as_choice": "A",
  "choices": {
    "text": [
      "more",
      "less",
      "no effect"
    ],
    "label": [
      "A",
      "B",
      "C"
    ]
  }
}

Saved 2298 records to wiqa_filtered_OUTOFPARA_DISTRACTOR.json
Example from 