# Generate the datasets for the experiments
Includes full prompts. Make the config for the models and datasets

In [1]:
import json
from datasets import load_dataset, Dataset, concatenate_datasets, load_from_disk
from tqdm import tqdm
import matplotlib.pyplot as plt
%config InlineBackend.figure_format = 'svg'
import numpy as np
from scipy import stats
from collections import Counter, defaultdict

import sys
sys.path.insert(0, "..")
sys.path.insert(0, "../..")
from config import REPO_ROOT

## Setup

In [2]:
income = ("""You will be provided data corresponding to a survey respondent. The survey was conducted among US residents in 2018. Please answer the question based on the information provided by selecting from one of the two choices. The data provided is enough to reach an answer.
    
The respondent data is: \n{description}

The question is: What is this person's estimated yearly income?
The choices are: ['Below $50,000', 'Above $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_followup = ("""Now revise the original respondent data such that your answer to the question on the revised respondent data would have been {complement}.

Here are the valid options and data types.

age: 17-96 inclusive (integer)
education: ['N/A - no schooling completed', 'Nursery school / preschool', 'Kindergarten', '1st grade only', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade', '8th grade', '9th grade', '10th grade', '11th grade', '12th grade, no diploma', 'Regular high school diploma', 'GED or alternative credential', 'Some college, less than 1 year', 'Some college, 1 or more years, no degree', "Associate's degree", "Bachelor's degree", "Master's degree", "Professional degree beyond a bachelor's degree", 'Doctorate degree'] (ordinal)

Share the revised data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "education": ""}}
""")

income_followup_minimal = ("""Now make a minimal revision to the original respondent data such that your answer to the question on the revised respondent data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: 17-96 inclusive (integer)
education: ['N/A - no schooling completed', 'Nursery school / preschool', 'Kindergarten', '1st grade only', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade', '8th grade', '9th grade', '10th grade', '11th grade', '12th grade, no diploma', 'Regular high school diploma', 'GED or alternative credential', 'Some college, less than 1 year', 'Some college, 1 or more years, no degree', "Associate's degree", "Bachelor's degree", "Master's degree", "Professional degree beyond a bachelor's degree", 'Doctorate degree'] (ordinal)

Minimality is computed with Gower’s Distance across all variables. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s full range. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The overall Gower score is the average of these per-field distances.

Share the revised data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "education": ""}}""")

In [3]:
house_prices = ("""You will be provided with data about a house. The data was collected in 2015 from across the United States. Please answer the question based on the information provided by selecting from one of the two choices. The data provided is enough to reach an answer.
    
The house data is: \n{description}

The question is: What is the price of this house?
The choices are: ['Below $1,500,000', 'Above $1,500,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

house_prices_followup = ("""Now revise the original house data such that your answer to the question on the revised house data would have been {complement}.

Here are the valid options and data types.

area: ['500', '1000', '1500', '2000', '2500', '3000', '3500', '4000', '4500', '5000', '5500', '6000', '6500', '7000', '7500', '8000', '8500', '9000', '9500', '10000'] (ordinal)
bedrooms: 1-5 inclusive (integer)
bathrooms: 1-4 inclusive (integer)
floors: 1-4 inclusive (integer)

Share the revised house data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"area": "", "bedrooms": "", "bathrooms": "", "floors": ""}}
""")

house_prices_followup_minimal = ("""Now make a minimal revision to the original house data such that your answer to the question on the revised house data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

area: ['500', '1000', '1500', '2000', '2500', '3000', '3500', '4000', '4500', '5000', '5500', '6000', '6500', '7000', '7500', '8000', '8500', '9000', '9500', '10000'] (ordinal)
bedrooms: 1-5 inclusive (integer)
bathrooms: 1-4 inclusive (integer)
floors: 1-4 inclusive (integer)

Minimality is computed with Gower’s Distance across all variables. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s full range. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The overall Gower score is the average of these per-field distances.

Share the revised house data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"area": "", "bedrooms": "", "bathrooms": "", "floors": ""}}
""")

In [4]:
heart_disease = ("""You will be provided with clinical data for an individual. The data was collected in 2015 from across the United States. Please answer the question based on the information provided by selecting from one of the two choices. The data provided is enough to reach an answer.
    
The patient data is: \n{description}

The question is: Does this patient have heart disease?
The choices are: ['No heart disease', 'Heart disease']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

heart_disease_followup = ("""Now revise the original patient data such that your answer to the question on the revised patient data would have been {complement}.

Here are the valid options and data types.

age: ['30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80'] (ordinal)
sex: ['Female', 'Male'] (categorical)
systolic_bp: ['110', '120', '130', '140', '150', '160', '170', '180'] (ordinal)
total_cholesterol: ['150', '165', '180', '195', '210', '225', '240', '255', '270', '285', '300'] (ordinal)

Share the revised patient data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "sex": "", "systolic_bp": "", "total_cholesterol": ""}}
""")

heart_disease_followup_minimal = ("""Now make a minimal revision to the original patient data such that your answer to the question on the revised patient data would have been {complement}. 

Your revision should be minimal in the sense that it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: ['30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80'] (ordinal)
sex: ['Female', 'Male'] (categorical)
systolic_bp: ['110', '120', '130', '140', '150', '160', '170', '180'] (ordinal)
total_cholesterol: ['150', '165', '180', '195', '210', '225', '240', '255', '270', '285', '300'] (ordinal)

Minimality is computed with Gower’s Distance across all variables. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s full range. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The overall Gower score is the average of these per-field distances.

Share the revised patient data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "sex": "", "systolic_bp": "", "total_cholesterol": ""}}
""")


In [5]:
# Load JSON into the `datasets` variable
with open(REPO_ROOT / 'src/models_datasets/datasets.json', 'r') as json_file:
    datasets = json.load(json_file)

## Metacognitive Prompting Experiment

In [6]:
income_followup_minimal_reasoning = ("""Now make a minimal revision to the original respondent data such that your answer to the question on the revised respondent data would have been {complement}. Whether your revision is valid or not will be determined by evaluating it with an independent instance of yourself.

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary. This is a self-modelling task.

Here are the valid options and data types.

age: 17-96 inclusive (integer)
education: ['N/A - no schooling completed', 'Nursery school / preschool', 'Kindergarten', '1st grade only', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade', '8th grade', '9th grade', '10th grade', '11th grade', '12th grade, no diploma', 'Regular high school diploma', 'GED or alternative credential', 'Some college, less than 1 year', 'Some college, 1 or more years, no degree', "Associate's degree", "Bachelor's degree", "Master's degree", "Professional degree beyond a bachelor's degree", 'Doctorate degree'] (ordinal)

Minimality is computed with Gower’s Distance across all variables. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s full range. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The overall Gower score is the average of these per-field distances.

Remember that you are an LLM performing a counterfactual self-modelling task. You must simulate your own internal decision-making process to complete this task. Follow this exact procedure where you consider 5 candidate counterfactuals and evaluate which is best.

1. Propose a Candidate Revision: Change one or more features of the original input to create a new version of the data.
2. Self-Predict: If you were presented with this revised data in a new context, what would your answer be? Predict your own behaviour.
3. Check Validity: Ask yourself — given this prediction, if the counterfactual valid? Does it constitute a change in the decision?
    If yes, it is a valid counterfactual.
    If no, it is not valid (i.e., your answer is unchanged).
4. Update Your Candidate:
    If the counterfactual is valid, try to revise it to make it closer to the original input (i.e., lower Gower's Distance).
    If the counterfactual is invalid, revise it to be further from the original input (i.e., higher Gower's Distance).
5. Repeat steps 2-4 such that you evaluate exactly five distinct revised inputs. 
    Signpost these "Candidate 1:", "Candidate 2:", "Candidate 3:", "Candidate 4:", "Candidate 5:" in your reasoning process to demonstrate that you've considered five candidate counterfactuals. Say these string outloud.
6. Select the Minimum, Valid Counterfactual: From all valid counterfactuals you found, choose the one with the lowest Gower's Distance to the original data. This is your final answer.

This is a self-modelling task: at each step, you must simulate how you would respond to the revised input. Your final output will be judged by an independent instance of yourself.

Share the final revised data. Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "education": ""}}""")

In [7]:
house_prices_followup_minimal_reasoning = ("""Now make a minimal revision to the original house data such that your answer to the question on the revised house data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

area: ['500', '1000', '1500', '2000', '2500', '3000', '3500', '4000', '4500', '5000', '5500', '6000', '6500', '7000', '7500', '8000', '8500', '9000', '9500', '10000'] (ordinal)
bedrooms: 1-5 inclusive (integer)
bathrooms: 1-4 inclusive (integer)
floors: 1-4 inclusive (integer)

Minimality is computed with Gower’s Distance across all variables. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s full range. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The overall Gower score is the average of these per-field distances.

Share the revised house data.

Remember that you are an LLM performing a counterfactual modelling task. You must simulate your own internal decision-making process to complete this task. Follow this exact procedure:

1. Propose a Candidate Revision: Change one or more features of the original input to create a new version of the data.
2. Self-Predict: Imagine you are being presented with this revised data in a new context. What would your answer be? Predict it.
3. Check Validity: Ask yourself — does this revised input flip your original decision?
    If yes, it is a valid counterfactual.
    If no, it is not valid (i.e., your answer is unchanged).
4. Update Your Candidate:
    If the counterfactual is valid, try to revise it to make it closer to the original input (i.e., lower Gower's Distance).
    If the counterfactual is invalid, revise it to be further from the original input (i.e., higher Gower's Distance).
5. Repeat steps 2-4. You must evaluate at least 5 distinct revised inputs.
6. Select the Minimum, Valid Counterfactual: From all valid counterfactuals you found, choose the one with the lowest Gower's Distance to the original data. This is your final answer.

This is a self-modelling task: at each step, you must simulate how you would respond to the revised input. Your final output will be judged by an independent instance of yourself.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"area": "", "bedrooms": "", "bathrooms": "", "floors": ""}}
""")

In [8]:
heart_disease_followup_minimal_reasoning = ("""Now make a minimal revision to the original patient data such that your answer to the question on the revised patient data would have been {complement}. 

Your revision should be minimal in the sense that it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: ['30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80'] (ordinal)
sex: ['Female', 'Male'] (categorical)
systolic_bp: ['110', '120', '130', '140', '150', '160', '170', '180'] (ordinal)
total_cholesterol: ['150', '165', '180', '195', '210', '225', '240', '255', '270', '285', '300'] (ordinal)

Minimality is computed with Gower’s Distance across all variables. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s full range. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The overall Gower score is the average of these per-field distances.

Share the revised patient data.

Remember that you are an LLM performing a counterfactual modelling task. You must simulate your own internal decision-making process to complete this task. Follow this exact procedure:

1. Propose a Candidate Revision: Change one or more features of the original input to create a new version of the data.
2. Self-Predict: Imagine you are being presented with this revised data in a new context. What would your answer be? Predict it.
3. Check Validity: Ask yourself — does this revised input flip your original decision?
    If yes, it is a valid counterfactual.
    If no, it is not valid (i.e., your answer is unchanged).
4. Update Your Candidate:
    If the counterfactual is valid, try to revise it to make it closer to the original input (i.e., lower Gower's Distance).
    If the counterfactual is invalid, revise it to be further from the original input (i.e., higher Gower's Distance).
5. Repeat steps 2-4. You must evaluate at least 5 distinct revised inputs.
6. Select the Minimum, Valid Counterfactual: From all valid counterfactuals you found, choose the one with the lowest Gower's Distance to the original data. This is your final answer.

This is a self-modelling task: at each step, you must simulate how you would respond to the revised input. Your final output will be judged by an independent instance of yourself.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "sex": "", "systolic_bp": "", "total_cholesterol": ""}}
""")

In [9]:
reasoning_datasets = {
    "income_minimal_reasoning": {
        "local":True,
        "filepath":"data/income",
        "name":"ACSIncome",
        "split":"train",
        "options":['Below $50,000',
        'Above $50,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":income,
        "followup_template":income_followup_minimal_reasoning,
        },
      "house_prices_minimal_reasoning": {
        "local":True,
        "filepath":"data/house_prices", 
        "name":"",
        "split":"",
        "options":['Below $1,500,000',
        'Above $1,500,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":house_prices,
        "followup_template":house_prices_followup_minimal_reasoning,
        },
    "heart_disease_minimal_reasoning": {
        "local":True,
        "filepath":"data/heart_disease", 
        "name":"",
        "split":"",
        "options":['No heart disease', 'Heart disease'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":heart_disease,
        "followup_template":heart_disease_followup_minimal_reasoning,
        },
}

# update
datasets.update(reasoning_datasets)

## Operationalising Distance Experiment

In [10]:
#########################################################################################################
# l1
#########################################################################################################
income_followup_minimal_l1 = ("""Now make a minimal revision to the original respondent data such that your answer to the question on the revised respondent data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: 17-96 inclusive (integer)
education: ['N/A - no schooling completed', 'Nursery school / preschool', 'Kindergarten', '1st grade only', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade', '8th grade', '9th grade', '10th grade', '11th grade', '12th grade, no diploma', 'Regular high school diploma', 'GED or alternative credential', 'Some college, less than 1 year', 'Some college, 1 or more years, no degree', "Associate's degree", "Bachelor's degree", "Master's degree", "Professional degree beyond a bachelor's degree", 'Doctorate degree'] (ordinal)

Minimality is computed with L1 distance with features weighted by the inverse median absolute deviation. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s median absolute deviation (MAD) in the full dataset. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The full dataset contains every combination of age and education. The overall distance is the sum of these per-field distances.

Share the revised data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "education": ""}}""")

#########################################################################################################
house_prices_followup_minimal_l1 = ("""Now make a minimal revision to the original house data such that your answer to the question on the revised house data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

area: ['500', '1000', '1500', '2000', '2500', '3000', '3500', '4000', '4500', '5000', '5500', '6000', '6500', '7000', '7500', '8000', '8500', '9000', '9500', '10000'] (ordinal)
bedrooms: 1-5 inclusive (integer)
bathrooms: 1-4 inclusive (integer)
floors: 1-4 inclusive (integer)

Minimality is computed with L1 distance with features weighted by the inverse median absolute deviation. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s median absolute deviation (MAD) in the full dataset. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The full dataset contains every combination of area, bedrooms, bathrooms and floors. The overall distance is the sum of these per-field distances.

Share the revised house data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"area": "", "bedrooms": "", "bathrooms": "", "floors": ""}}
""")

#########################################################################################################
heart_disease_followup_minimal_l1 = ("""Now make a minimal revision to the original patient data such that your answer to the question on the revised patient data would have been {complement}. 

Your revision should be minimal in the sense that it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: ['30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80'] (ordinal)
sex: ['Female', 'Male'] (categorical)
systolic_bp: ['110', '120', '130', '140', '150', '160', '170', '180'] (ordinal)
total_cholesterol: ['150', '165', '180', '195', '210', '225', '240', '255', '270', '285', '300'] (ordinal)

Minimality is computed with L1 distance with features weighted by the inverse median absolute deviation. For numeric or ordinal fields, the per-field distance is the absolute difference divided by that variable’s median absolute deviation (MAD) in the full dataset. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The full dataset contains every combination of age, sex, systolic_bp and total_cholesterol. The overall distance is the sum of these per-field distances.

Share the revised patient data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "sex": "", "systolic_bp": "", "total_cholesterol": ""}}
""")

l1_datasets = {
    "income_minimal_l1": {
        "local":True,
        "filepath":"data/income",
        "name":"ACSIncome",
        "split":"train",
        "options":['Below $50,000',
        'Above $50,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":income,
        "followup_template":income_followup_minimal_l1,
        },
      "house_prices_minimal_l1": {
        "local":True,
        "filepath":"../data/house_prices", 
        "name":"",
        "split":"",
        "options":['Below $1,500,000',
        'Above $1,500,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":house_prices,
        "followup_template":house_prices_followup_minimal_l1,
        },
    "heart_disease_minimal_l1": {
        "local":True,
        "filepath":"../data/heart_disease", 
        "name":"",
        "split":"",
        "options":['No heart disease', 'Heart disease'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":heart_disease,
        "followup_template":heart_disease_followup_minimal_l1,
        },
}

# update
datasets.update(l1_datasets)

In [11]:
#########################################################################################################
# l2
#########################################################################################################

income_followup_minimal_l2 = ("""Now make a minimal revision to the original respondent data such that your answer to the question on the revised respondent data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: 17-96 inclusive (integer)
education: ['N/A - no schooling completed', 'Nursery school / preschool', 'Kindergarten', '1st grade only', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade', '8th grade', '9th grade', '10th grade', '11th grade', '12th grade, no diploma', 'Regular high school diploma', 'GED or alternative credential', 'Some college, less than 1 year', 'Some college, 1 or more years, no degree', "Associate's degree", "Bachelor's degree", "Master's degree", "Professional degree beyond a bachelor's degree", 'Doctorate degree'] (ordinal)

Minimality is computed with squared L2 distance with features weighted by the inverse standard deviation. For numeric or ordinal fields, the per-field distance is the squared L2 distance divided by that variable’s standard deviation in the full dataset. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The full dataset contains every combination of age and education. The overall distance is the sum of these per-field distances.

Share the revised data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "education": ""}}""")

#########################################################################################################
house_prices_followup_minimal_l2 = ("""Now make a minimal revision to the original house data such that your answer to the question on the revised house data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

area: ['500', '1000', '1500', '2000', '2500', '3000', '3500', '4000', '4500', '5000', '5500', '6000', '6500', '7000', '7500', '8000', '8500', '9000', '9500', '10000'] (ordinal)
bedrooms: 1-5 inclusive (integer)
bathrooms: 1-4 inclusive (integer)
floors: 1-4 inclusive (integer)

Minimality is computed with squared L2 distance with features weighted by the inverse standard deviation. For numeric or ordinal fields, the per-field distance is the squared L2 distance divided by that variable’s standard deviation in the full dataset. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The full dataset contains every combination of area, bedrooms, bathrooms and floors. The overall distance is the sum of these per-field distances.

Share the revised house data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"area": "", "bedrooms": "", "bathrooms": "", "floors": ""}}
""")

#########################################################################################################
heart_disease_followup_minimal_l2 = ("""Now make a minimal revision to the original patient data such that your answer to the question on the revised patient data would have been {complement}. 

Your revision should be minimal in the sense that it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: ['30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80'] (ordinal)
sex: ['Female', 'Male'] (categorical)
systolic_bp: ['110', '120', '130', '140', '150', '160', '170', '180'] (ordinal)
total_cholesterol: ['150', '165', '180', '195', '210', '225', '240', '255', '270', '285', '300'] (ordinal)

Minimality is computed with squared L2 distance with features weighted by the inverse standard deviation. For numeric or ordinal fields, the per-field distance is the squared L2 distance divided by that variable’s standard deviation in the full dataset. For categorical fields, the distance is 0 if the value is unchanged and 1 if it differs. The full dataset contains every combination of age, sex, systolic_bp and total_cholesterol. The overall distance is the sum of these per-field distances.

Share the revised patient data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "sex": "", "systolic_bp": "", "total_cholesterol": ""}}
""")

#########################################################################################################
l2_datasets = {
    "income_minimal_l2": {
        "local":True,
        "filepath":"data/income",
        "name":"ACSIncome",
        "split":"train",
        "options":['Below $50,000',
        'Above $50,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":income,
        "followup_template":income_followup_minimal_l2,
        },
      "house_prices_minimal_l2": {
        "local":True,
        "filepath":"../data/house_prices", 
        "name":"",
        "split":"",
        "options":['Below $1,500,000',
        'Above $1,500,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":house_prices,
        "followup_template":house_prices_followup_minimal_l2,
        },
    "heart_disease_minimal_l2": {
        "local":True,
        "filepath":"../data/heart_disease", 
        "name":"",
        "split":"",
        "options":['No heart disease', 'Heart disease'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":heart_disease,
        "followup_template":heart_disease_followup_minimal_l2,
        },
}

# update
datasets.update(l2_datasets)

In [12]:
#########################################################################################################
# semantic
#########################################################################################################
income_followup_minimal_semantic = ("""Now make a minimal revision to the original respondent data such that your answer to the question on the revised respondent data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: 17-96 inclusive (integer)
education: ['N/A - no schooling completed', 'Nursery school / preschool', 'Kindergarten', '1st grade only', '2nd grade', '3rd grade', '4th grade', '5th grade', '6th grade', '7th grade', '8th grade', '9th grade', '10th grade', '11th grade', '12th grade, no diploma', 'Regular high school diploma', 'GED or alternative credential', 'Some college, less than 1 year', 'Some college, 1 or more years, no degree', "Associate's degree", "Bachelor's degree", "Master's degree", "Professional degree beyond a bachelor's degree", 'Doctorate degree'] (ordinal)

Minimality is computed with Semantic Cosine Distance. Each record is first expressed as a short natural-language description and embedded with a pretrained sentence-transformer model; the distance between two records is defined as 1 minus the cosine similarity of their embeddings. This single semantic score serves as the minimality measure.

Share the revised data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "education": ""}}""")

#########################################################################################################
house_prices_followup_minimal_semantic = ("""Now make a minimal revision to the original house data such that your answer to the question on the revised house data would have been {complement}. 

Your revision should be minimal in the sense it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

area: ['500', '1000', '1500', '2000', '2500', '3000', '3500', '4000', '4500', '5000', '5500', '6000', '6500', '7000', '7500', '8000', '8500', '9000', '9500', '10000'] (ordinal)
bedrooms: 1-5 inclusive (integer)
bathrooms: 1-4 inclusive (integer)
floors: 1-4 inclusive (integer)

Minimality is computed with Semantic Cosine Distance. Each record is first expressed as a short natural-language description and embedded with a pretrained sentence-transformer model; the distance between two records is defined as 1 minus the cosine similarity of their embeddings. This single semantic score serves as the minimality measure.

Share the revised house data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"area": "", "bedrooms": "", "bathrooms": "", "floors": ""}}
""")

#########################################################################################################
heart_disease_followup_minimal_semantic = ("""Now make a minimal revision to the original patient data such that your answer to the question on the revised patient data would have been {complement}. 

Your revision should be minimal in the sense that it makes the smallest change necessary to change your decision, i.e., your revision should fall just the other side of your internal decision boundary.

Here are the valid options and data types.

age: ['30', '35', '40', '45', '50', '55', '60', '65', '70', '75', '80'] (ordinal)
sex: ['Female', 'Male'] (categorical)
systolic_bp: ['110', '120', '130', '140', '150', '160', '170', '180'] (ordinal)
total_cholesterol: ['150', '165', '180', '195', '210', '225', '240', '255', '270', '285', '300'] (ordinal)

Minimality is computed with Semantic Cosine Distance. Each record is first expressed as a short natural-language description and embedded with a pretrained sentence-transformer model; the distance between two records is defined as 1 minus the cosine similarity of their embeddings. This single semantic score serves as the minimality measure.

Share the revised patient data.

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"age": "", "sex": "", "systolic_bp": "", "total_cholesterol": ""}}
""")

#########################################################################################################
semantic_datasets = {
    "income_minimal_semantic": {
        "local":True,
        "filepath":"data/income",
        "name":"ACSIncome",
        "split":"train",
        "options":['Below $50,000',
        'Above $50,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":income,
        "followup_template":income_followup_minimal_l2,
        },
      "house_prices_minimal_semantic": {
        "local":True,
        "filepath":"data/house_prices", 
        "name":"",
        "split":"",
        "options":['Below $1,500,000',
        'Above $1,500,000'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":house_prices,
        "followup_template":house_prices_followup_minimal_l2,
        },
    "heart_disease_minimal_semantic": {
        "local":True,
        "filepath":"data/heart_disease", 
        "name":"",
        "split":"",
        "options":['No heart disease', 'Heart disease'],
        "answer_key": "answer",
        "input_key":"description",
        "standard_prompt_template":heart_disease,
        "followup_template":heart_disease_followup_minimal_l2,
        },
}

# update
datasets.update(semantic_datasets)

## Decision boundary sensitivity

In [16]:
# o3 generated with strict instructions

income_1 = (
    """You will be shown information for one respondent from a 2018 survey of U.S. residents.  
Please decide the respondent’s estimated yearly income based solely on the data.

Respondent data:
{description}

The question is: What is this person's estimated yearly income?  
The choices are: ['Below $50,000', 'Above $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_2 = (
    """Below is a single record from a nationally-representative survey run in the United States in 2018.  
Using the details provided, identify the correct income bracket.

Data:
{description}

The question is: What is this person's estimated yearly income?  
The options are: ['Above $50,000', 'Below $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_3 = (
    """A respondent profile from the 2018 U.S. survey is presented. Review it carefully and select the income range that applies.

Profile ➜
{description}

The question is: What is this person's estimated yearly income?  
The choices are: ['Above $50,000', 'Below $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_4 = (
    """Study the following 2018 U.S. resident survey entry and categorize the participant’s annual earnings.

Participant entry:
{description}

The question is: What is this person's estimated yearly income?  
The options are: ['Below $50,000', 'Above $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_5 = (
    """You have been provided data from a single U.S. respondent in a 2018 survey. Determine their correct income band.

Record:
{description}

The question is: What is this person's estimated yearly income?  
The choices are: ['Below $50,000', 'Above $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_6 = (
    """Examine the 2018 survey respondent information that follows and assign an income category.

Information:
{description}

The question is: What is this person's estimated yearly income?  
The options are: ['Above $50,000', 'Below $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_7 = (
    """From a 2018 U.S. nationwide survey, one individual’s details are listed below. Select their annual income bracket.

Details:
{description}

The question is: What is this person's estimated yearly income?  
The choices are: ['Above $50,000', 'Below $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_8 = (
    """Consider the respondent data (2018 U.S. survey) presented. Choose the proper earnings tier.

Respondent data:
{description}

The question is: What is this person's estimated yearly income?  
The options are: ['Below $50,000', 'Above $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_9 = (
    """A single 2018 survey response from the United States appears below. Classify the individual’s yearly income.

Survey response:
{description}

The question is: What is this person's estimated yearly income?  
The choices are: ['Below $50,000', 'Above $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_10 = (
    """Review the data excerpt from a 2018 study of U.S. residents and decide which income bracket applies.

Excerpt:
{description}

The question is: What is this person's estimated yearly income?  
The options are: ['Above $50,000', 'Below $50,000']

Only respond with JSON output. Do not include any additional words in your answer. Format your response as a JSON file with the keys as provided below:
{{"answer": ""}}
"""
)

income_11 = (
    """Below is one entry from a 2018 survey of U.S. adults.  
Identify which salary tier best describes the respondent.

Entry:
{description}

Question: What is this person's estimated yearly income?  
Choices: ['Below $50,000', 'Above $50,000']

Return **only** a JSON object—nothing else—formatted like:
{{"answer": ""}}
"""
)

income_12 = (
    """Consider the single 2018 U.S. survey record provided. Decide the correct annual-income grouping.

Record details:
{description}

What is the person’s estimated yearly income?  
Options: ['Above $50,000', 'Below $50,000']

Output strictly as JSON in the form:
{{"answer": ""}}
"""
)

income_13 = (
    """You are shown data for one U.S. respondent (survey year 2018).  
Choose the appropriate income bracket.

Respondent information:
{description}

Select their estimated yearly income.  
The options are: ['Below $50,000', 'Above $50,000']

Respond solely with this JSON structure:
{{"answer": ""}}
"""
)

income_14 = (
    """Survey snapshot (United States, 2018) appears below. Classify the income category.

Snapshot:
{description}

Which annual-income range fits?  
Options: ['Above $50,000', 'Below $50,000']

Reply only with JSON:
{{"answer": ""}}
"""
)

income_15 = (
    """Here is a participant profile from the 2018 nationwide U.S. study.  
Infer the earnings band.

Profile:
{description}

Prompt: Estimate this person's yearly income.  
Choices available: ['Below $50,000', 'Above $50,000']

Provide a single JSON object exactly like:
{{"answer": ""}}
"""
)

income_16 = (
    """A single observation from a 2018 U.S. public-opinion poll is listed.  
Assign an income level.

Observation:
{description}

What is the individual’s estimated annual income?  
Possible answers: ['Above $50,000', 'Below $50,000']

Output must be JSON only:
{{"answer": ""}}
"""
)

income_17 = (
    """Use the 2018 survey data point below to pick an earnings tier.

Data point:
{description}

Question: Estimated yearly income?  
Answer set: ['Below $50,000', 'Above $50,000']

Return only this JSON object:
{{"answer": ""}}
"""
)

income_18 = (
    """Review the following 2018 respondent details from the U.S. study and decide on the income bracket.

Details:
{description}

Which bracket suits their annual income?  
Options list: ['Above $50,000', 'Below $50,000']

JSON-only response, format:
{{"answer": ""}}
"""
)

income_19 = (
    """From the 2018 United States survey, one record is provided.  
Determine the correct yearly-income category.

Record:
{description}

Select the estimated annual income.  
Possible options: ['Below $50,000', 'Above $50,000']

Reply with JSON only:
{{"answer": ""}}
"""
)

income_20 = (
    """Inspect the respondent summary (U.S., 2018) then label the earnings range.

Summary:
{description}

Asked: What is their estimated yearly income?  
Choices are: ['Above $50,000', 'Below $50,000']

Respond in pure JSON:
{{"answer": ""}}
"""
)

income_21 = (
    """Single U.S. survey entry from 2018 provided. Categorize income.

Entry information:
{description}

What income tier applies?  
Options: ['Below $50,000', 'Above $50,000']

Return JSON only in this shape:
{{"answer": ""}}
"""
)

income_22 = (
    """Below you will find a 2018 survey profile.  
Pick the appropriate yearly salary bracket.

Profile data:
{description}

The question: Estimated yearly income?  
Answer choices: ['Above $50,000', 'Below $50,000']

Your reply must be the JSON object:
{{"answer": ""}}
"""
)

income_23 = (
    """Examine the next 2018 survey record (U.S.).  
Identify the respondent’s income band.

Survey record:
{description}

Choose the estimated yearly income.  
Options list: ['Below $50,000', 'Above $50,000']

Output only:
{{"answer": ""}}
"""
)

income_24 = (
    """A 2018 data entry for one American respondent is displayed.  
Select the correct annual-income bracket.

Data entry:
{description}

Question: What is their estimated yearly income?  
Possible answers: ['Above $50,000', 'Below $50,000']

Give ONLY this JSON:
{{"answer": ""}}
"""
)

income_25 = (
    """Refer to the respondent snapshot below taken from a 2018 U.S. poll.  
Determine which income group fits best.

Snapshot:
{description}

Prompt: Estimated annual income?  
Choices: ['Below $50,000', 'Above $50,000']

Return strictly JSON formatted as:
{{"answer": ""}}
"""
)

income_26 = (
    """Using the single 2018 U.S. survey record below, tag the income range.

Record:
{description}

Which yearly-income bracket matches?  
Options: ['Above $50,000', 'Below $50,000']

Respond only with this JSON:
{{"answer": ""}}
"""
)

income_27 = (
    """2018 U.S. survey respondent info follows.  
Assign a yearly income tier.

Info:
{description}

Choose the estimated yearly income.  
Answer set: ['Below $50,000', 'Above $50,000']

JSON object only:
{{"answer": ""}}
"""
)

income_28 = (
    """Look at the following U.S. survey line (year 2018) and decide on the income category.

Line:
{description}

Question: What is their estimated annual earnings?  
Choices available: ['Above $50,000', 'Below $50,000']

Return JSON only:
{{"answer": ""}}
"""
)

income_29 = (
    """You have one respondent’s data from the 2018 U.S. survey.  
Choose an income bracket.

Data:
{description}

Identify the estimated yearly income.  
Possible options: ['Below $50,000', 'Above $50,000']

Reply as JSON:
{{"answer": ""}}
"""
)

income_30 = (
    """Presented is a single entry from the 2018 U.S. resident survey.  
Pick the income level.

Entry:
{description}

Which yearly-income tier applies?  
Options: ['Above $50,000', 'Below $50,000']

Respond solely in JSON:
{{"answer": ""}}
"""
)

income_31 = (
    """Analyze this 2018 survey record of a U.S. resident and select the income bracket.

Record data:
{description}

What is the estimated yearly income?  
Choices are: ['Below $50,000', 'Above $50,000']

Return JSON only:
{{"answer": ""}}
"""
)

income_32 = (
    """A 2018 American survey participant’s details appear below.  
Determine their annual-income classification.

Participant:
{description}

Question asked: Estimated yearly income?  
Answer list: ['Above $50,000', 'Below $50,000']

Provide a JSON object only:
{{"answer": ""}}
"""
)

income_33 = (
    """Single respondent data from the 2018 U.S. questionnaire is given.  
Select the correct salary bracket.

Respondent data:
{description}

Estimated yearly income?  
Options: ['Below $50,000', 'Above $50,000']

Your output must be JSON:
{{"answer": ""}}
"""
)

income_34 = (
    """Below is information from a 2018 nationwide U.S. study.  
Choose the income tier.

Information:
{description}

Which bracket represents yearly income?  
Possible answers: ['Above $50,000', 'Below $50,000']

Return JSON only like:
{{"answer": ""}}
"""
)

income_35 = (
    """Inspect the 2018 survey excerpt and decide on the earnings band.

Excerpt:
{description}

Question: What is the respondent’s estimated annual income?  
Choices: ['Below $50,000', 'Above $50,000']

Output strictly in JSON form:
{{"answer": ""}}
"""
)

income_36 = (
    """Here is a 2018 U.S. survey snippet.  
Identify the income category.

Snippet:
{description}

Select their yearly income estimate.  
Options list: ['Above $50,000', 'Below $50,000']

Respond only with JSON:
{{"answer": ""}}
"""
)

income_37 = (
    """2018 U.S. respondent profile follows.  
Assign the appropriate income range.

Profile:
{description}

Estimated yearly income—choose one:  
['Below $50,000', 'Above $50,000']

Return a JSON object:
{{"answer": ""}}
"""
)

income_38 = (
    """Review the single 2018 survey response and categorize income.

Survey response:
{description}

What is the person’s estimated yearly earnings?  
Options are: ['Above $50,000', 'Below $50,000']

Output must be JSON only:
{{"answer": ""}}
"""
)

income_39 = (
    """A lone data record from the 2018 U.S. survey is shown.  
Determine the salary tier.

Data record:
{description}

Which annual income bracket applies?  
Choices: ['Below $50,000', 'Above $50,000']

Reply solely as JSON:
{{"answer": ""}}
"""
)

income_40 = (
    """Consider this 2018 U.S. respondent entry and pick an income group.

Entry details:
{description}

Prompt: Estimate their yearly income.  
Answer set: ['Above $50,000', 'Below $50,000']

Return exactly:
{{"answer": ""}}
"""
)

income_41 = (
    """Using the data snippet (U.S., 2018) below, label the earnings bracket.

Data snippet:
{description}

Which yearly-income tier best fits?  
Options: ['Below $50,000', 'Above $50,000']

Respond in JSON only:
{{"answer": ""}}
"""
)

income_42 = (
    """The following represents one 2018 U.S. survey observation.  
Classify its income range.

Observation:
{description}

Question: Estimated annual income?  
Possible answers: ['Above $50,000', 'Below $50,000']

JSON output only:
{{"answer": ""}}
"""
)

income_43 = (
    """Here is one respondent’s 2018 data (United States).  
Determine the correct income band.

Data:
{description}

What is their estimated yearly income?  
Answer choices: ['Below $50,000', 'Above $50,000']

Return JSON object:
{{"answer": ""}}
"""
)

income_44 = (
    """A participant record from the 2018 national survey is below.  
Choose the annual income grouping.

Participant record:
{description}

Select an estimated yearly income.  
Options list: ['Above $50,000', 'Below $50,000']

Provide only JSON:
{{"answer": ""}}
"""
)

income_45 = (
    """Study the 2018 survey information displayed and identify the income level.

Information:
{description}

Question: What is the respondent’s estimated yearly income?  
Choices: ['Below $50,000', 'Above $50,000']

Respond solely in JSON:
{{"answer": ""}}
"""
)

income_46 = (
    """From the 2018 U.S. survey dataset, one entry is shown.  
Pick the appropriate income tier.

Dataset entry:
{description}

Which annual income bracket matches?  
Options are: ['Above $50,000', 'Below $50,000']

Return JSON exactly as:
{{"answer": ""}}
"""
)

income_47 = (
    """Review the following 2018 U.S. survey snippet.  
Assign the earnings category.

Snippet:
{description}

Prompt: Estimated annual income?  
Available answers: ['Below $50,000', 'Above $50,000']

Output JSON only:
{{"answer": ""}}
"""
)

income_48 = (
    """One 2018 survey profile is provided.  
Categorize the respondent’s income.

Profile snippet:
{description}

Which yearly-income group applies?  
Answer set: ['Above $50,000', 'Below $50,000']

Respond with JSON:
{{"answer": ""}}
"""
)

income_49 = (
    """Inspect the 2018 survey record below and choose a salary bracket.

Record details:
{description}

What is their estimated yearly income?  
Choices: ['Below $50,000', 'Above $50,000']

Return a JSON object only:
{{"answer": ""}}
"""
)

income_50 = (
    """Below is a 2018 U.S. survey entry. Decide on the income range.

Survey entry:
{description}

Question: Estimated yearly income?  
Options list: ['Above $50,000', 'Below $50,000']

Output only this JSON:
{{"answer": ""}}
"""
)


# assume income_1 … income_20 are already defined
prompt_names = [f"income_{i}" for i in range(1, 51)]

for name in prompt_names:
    datasets.update({name:{
        "local": True,
        "filepath": "data/income",
        "name": "ACSIncome",
        "split": "train",
        "options": ['Below $50,000', 'Above $50,000'],
        "answer_key": "answer",
        "input_key": "description",
        "standard_prompt_template": globals()[name], 
        "followup_template": income_followup,
    }})


In [17]:
datasets["income_40"]

{'local': True,
 'filepath': 'data/income',
 'name': 'ACSIncome',
 'split': 'train',
 'options': ['Below $50,000', 'Above $50,000'],
 'answer_key': 'answer',
 'input_key': 'description',
 'standard_prompt_template': 'Consider this 2018 U.S. respondent entry and pick an income group.\n\nEntry details:\n{description}\n\nPrompt: Estimate their yearly income.  \nAnswer set: [\'Above $50,000\', \'Below $50,000\']\n\nReturn exactly:\n{{"answer": ""}}\n',
 'followup_template': 'Now revise the original respondent data such that your answer to the question on the revised respondent data would have been {complement}.\n\nHere are the valid options and data types.\n\nage: 17-96 inclusive (integer)\neducation: [\'N/A - no schooling completed\', \'Nursery school / preschool\', \'Kindergarten\', \'1st grade only\', \'2nd grade\', \'3rd grade\', \'4th grade\', \'5th grade\', \'6th grade\', \'7th grade\', \'8th grade\', \'9th grade\', \'10th grade\', \'11th grade\', \'12th grade, no diploma\', \'Regula

## Save

In [18]:
# save
with open(REPO_ROOT / 'src/models_datasets/datasets.json', 'w') as json_file:
    json.dump(datasets, json_file, indent=4)