In [1]:
import json
import os
import random

def get_templates():
    """Return the list of specific templates to use."""
    return [
        "q:{}\na:",
        "Q::{}\nA::",
        "Q:{}\nA:",
        "q::{}\na::",
        "Q::: {}\nA:::",
        "Q:{}||A:",
        "q:::{}\na:::",
        "Q:{}\nAnswer:",
        "QUESTION:{}\nA:",
        "Question:{}\nAnswer:",
        "QUESTION:{}\nANSWER:",
        "Question:{}\nAnswer:",
        "Question::: {}\nAnswer:::",
        "QUESTION:{}\nAnswer:",
        "Question-{}\nAnswer",
        "question::{}\nanswer::",
        "question:{}\nanswer:",
        "Question:{}||Answer:",
        "QUESTION\t{}\nANSWER\t",
        "Question:{},Answer:"
    ]

def generate_variants(question_data):
    """Generate variants using randomly selected templates."""
    base_question = question_data["question"].split('\n')[0]  # Get the question part without <image>
    all_templates = get_templates()
    
    # Randomly select 10 templates
    selected_templates = random.sample(all_templates, 10)
    
    variants = {
        "question": question_data["question"],
        "answer": question_data["answer"],
        "image": question_data["image"]
    }
    
    # Generate variants using the randomly selected templates
    for i, template in enumerate(selected_templates, 1):
        variant = template.format(base_question) + "\n<image>"
        variants[f"variation_{i}"] = variant
    
    return variants

def process_questions(input_data):
    """Process all questions and create variant files."""
    # Create output directory if it doesn't exist
    output_dir = "/ephemeral/shashmi/posix_new_improved/Thesis/template_question_variant"
    os.makedirs(output_dir, exist_ok=True)
    
    # Convert string to list of dictionaries if needed
    if isinstance(input_data, str):
        questions = json.loads(input_data)
    else:
        questions = input_data
    
    # Process each question
    for idx, question_data in enumerate(questions, 1):
        variants = generate_variants(question_data)
        
        # Save to file
        output_file = os.path.join(output_dir, f"question_{idx}_variants.json")
        with open(output_file, 'w', encoding='utf-8') as f:
            json.dump(variants, f, indent=2, ensure_ascii=False)

# Read the input JSON file
def read_input_json(file_path):
    with open(file_path, 'r', encoding='utf-8') as f:
        return [json.loads(line) for line in f if line.strip()]

# Example usage:
if __name__ == "__main__":
    input_file = "/ephemeral/shashmi/posix_new_improved/Thesis/vanillah_iuxray_json.json"  # Replace with your input file path
    questions = read_input_json(input_file)
    process_questions(questions)