In [1]:
import json

In [2]:
def load_json(file_path):
    """
    Load a JSON file and return its content.
    """
    try:
        with open(file_path, 'r') as file:
            return json.load(file)
    except FileNotFoundError:
        print(f"Error: File {file_path} not found.")
        return None
    except json.JSONDecodeError:
        print(f"Error: File {file_path} contains invalid JSON.")
        return None

In [3]:
def replace_placeholder_with_details(template_entry, descriptors):
    """
    Replace the placeholder 'XYZ' in a template with each descriptor,
    and attach the descriptor, target, and bias context.
    """
    bias_context = template_entry["bias_context"]
    template = template_entry["template"]

    replaced_sentences_with_details = []
    for descriptor_entry in descriptors:
        descriptor = descriptor_entry["descriptor"]
        target = descriptor_entry["target"]
        sentence = template.replace("XYZ", descriptor)
        replaced_sentences_with_details.append({
            "sentence": sentence,
            "descriptor": descriptor,
            "target": target,
            "bias_context": bias_context
        })
    return replaced_sentences_with_details

In [4]:
def generate_sentences(templates_file, descriptors_file, output_file):
    """
    Generate sentences by replacing placeholders in templates with descriptors,
    and include the descriptor, target, and bias context.
    """
    templates = load_json(templates_file)
    descriptors = load_json(descriptors_file)

    if not templates or not descriptors:
        return

    all_sentences = []
    for template_entry in templates:
        all_sentences.extend(replace_placeholder_with_details(template_entry, descriptors))

    save_to_file(all_sentences, output_file)
    print(f"Generated sentences saved to {output_file}")

In [5]:
def save_to_file(data, file_path):
    """
    Save data to a file in JSON format.
    """
    try:
        with open(file_path, 'w') as file:
            json.dump(data, file, indent=4)
    except Exception as e:
        print(f"Error saving to file {file_path}: {e}")

In [6]:
# Main execution
if __name__ == "__main__":
    templates_file = "templates.json"
    descriptors_file = "descriptors.json"
    output_file = "prefix_template.json"

    generate_sentences(templates_file, descriptors_file, output_file)

Generated sentences saved to prefix_template.json


### Sample Sentence Collection 

In [3]:
import json
from collections import defaultdict


file_path = 'prefix_template.json'
with open(file_path, 'r') as file:
    data = json.load(file)

grouped_data = defaultdict(lambda: defaultdict(list))
for item in data:
    grouped_data[item["bias_context"]][item["target"]].append(item)

final_samples = []
for bias_context, targets in grouped_data.items():
    for target, sentences in targets.items():
        if len(sentences) >= 3:
            final_samples.extend(sentences[:3])   

output_file_path = 'sampled_prefix_template.json'
with open(output_file_path, 'w') as output_file:
    json.dump(final_samples, output_file, indent=4)

print(f"Sampled data saved to {output_file_path}")

Sampled data saved to sampled_prefix_template.json
