In [18]:
import json
from pathlib import Path
import pandas as pd

# Set the outputs directory path (relative to notebook location)
# Notebook is in scripts/, outputs is in parent directory
outputs_dir = Path("../outputs").resolve()

# Verify the directory exists
if not outputs_dir.exists():
    raise FileNotFoundError(f"Outputs directory not found at: {outputs_dir}")

# Function to load JSONL file
def load_jsonl(file_path):
    """Load a JSONL file and return a list of dictionaries."""
    data = []
    with open(file_path, 'r', encoding='utf-8') as f:
        for line in f:
            if line.strip():  # Skip empty lines
                data.append(json.loads(line))
    return data

# Load all datasets from outputs directory
datasets = {}

# Find all JSONL files in outputs directory
jsonl_files = list(outputs_dir.glob("*.jsonl"))

print(f"Found {len(jsonl_files)} JSONL files in outputs directory:")
for file in jsonl_files:
    print(f"  - {file.name}")

# Load each file
for file_path in jsonl_files:
    file_name = file_path.stem  # Get filename without extension
    datasets[file_name] = load_jsonl(file_path)
    print(f"\nLoaded {file_name}: {len(datasets[file_name])} records")

# Create convenient variables for each dataset
base_instances = datasets.get('base_instances_wvs_ess_wave_10_ess_wave_11_plus4_20260112_192459', [])
feature_order_test = datasets.get('feature_order_test', [])
options_context_test = datasets.get('options_context_test', [])
surface_form_test = datasets.get('surface_form_test', [])

print("\n" + "="*50)
print("Datasets loaded successfully!")
print("="*50)
print(f"\nAvailable datasets:")
print(f"  - base_instances: {len(base_instances)} records")
print(f"  - feature_order_test: {len(feature_order_test)} records")
print(f"  - options_context_test: {len(options_context_test)} records")
print(f"  - surface_form_test: {len(surface_form_test)} records")

Found 4 JSONL files in outputs directory:
  - base_instances_wvs_ess_wave_10_ess_wave_11_plus4_20260113_164142.jsonl
  - feature_order_test.jsonl
  - options_context_test.jsonl
  - surface_form_test.jsonl

Loaded base_instances_wvs_ess_wave_10_ess_wave_11_plus4_20260113_164142: 2547 records

Loaded feature_order_test: 2500 records

Loaded options_context_test: 2035 records

Loaded surface_form_test: 2055 records

Datasets loaded successfully!

Available datasets:
  - base_instances: 0 records
  - feature_order_test: 2500 records
  - options_context_test: 2035 records
  - surface_form_test: 2055 records


In [50]:
n = 909
print(surface_form_test[n]['target_question'], '\n', surface_form_test[n]['option_sets'])

How much do you trust the national electoral commission? 
 {'original': ['Not at all', 'Just a little', 'Somewhat', 'A lot'], 'synonym': ['Not in any way', 'Just Slightly', 'To some extent', 'A great deal'], 'reorder': [None, None, None, None], 'pronoun': [None, None, None, None]}


In [58]:
feature_order_test[0]

{'example_id': 'fo_asianbarometer_Thailand_519_region_s4m3',
 'base_id': 'asianbarometer_Thailand_519_region',
 'survey': 'asianbarometer',
 'respondent_id': 'Thailand_519',
 'country': 'Thailand',
 'profile_type': 's4m3',
 'n_features': 3,
 'target_question': 'What is your administrative region?',
 'target_code': 'region',
 'ground_truth': 'Northeast',
 'ground_truth_index': 2,
 'options': ['Bangkok', 'North', 'Northeast', 'Central', 'South'],
 'orderings': {'original': {'On a staircase with 10 steps where the poorest people are on the first step and the richest on the tenth step, where would you put yourself?': '7 - Richer',
   'On a staircase with 10 steps where the poorest people are on the first step and the richest on the tenth step, where would you put your parents?': '7 - Richer',
   'Would you describe yourself as very religious, moderately religious, lightly religious, or not religious at all?': 'Not religious at all'},
  'reversed': {'Would you describe yourself as very reli

In [57]:
n = 1
print(options_context_test[n]['target_question'], '\n', options_context_test[n]['conditions'])

In your view, how often do women have equal opportunities to run for the office in this country's elections? 
 {'hidden': None, 'shown_natural': ['Very often', 'Fairly often', 'Not often', 'Not at all often'], 'shown_reversed': ['Not at all often', 'Not often', 'Fairly often', 'Very often']}
