In [6]:
import json
import shutil
import re
import random
import win32com.client
import win32com.client.dynamic
from docx import Document
from openai import OpenAI


def format_number(value):
    if isinstance(value, int):
        # Format integers with thousand separators
        return f"{value:,}".replace(",", ".")
    elif isinstance(value, float):
        # Format floats with thousand separators and two decimal places
        int_part, dec_part = f"{value:.2f}".split(".")
        formatted_int_part = re.sub(r'(\d)(?=(\d{3})+(?!\d))', r'\1.', int_part)
        return f"{formatted_int_part},{dec_part}"
    else:
        # Return as string if not a number
        return str(value)
    
    
def complete_example_numbers(example_dict):
    # Calculate the sum of N1 to N4
    example_dict["N5"] = example_dict["N1"] + example_dict["N2"] + example_dict["N3"] + example_dict["N4"]
    
    # Calculate the sum of N6 to N16
    example_dict["N17"] = example_dict["N6"] + example_dict["N7"] + example_dict["N8"] + example_dict["N9"] + example_dict["N10"] + example_dict["N11"] + example_dict["N12"] + example_dict["N13"] + example_dict["N14"] + example_dict["N15"] + example_dict["N16"]
    
    # Calculate the percentage of N5 to N17 and round to 2 decimal places
    example_dict["N18"] = round((example_dict["N5"] / example_dict["N17"]) * 100, 2)
    # example_dict["N18"] = round((example_dict["N5"] / example_dict["N17"]) * 100, 2)
    
    # Return the completed example
    return example_dict    


def generate_example_from_template(context: str, template: dict):
    # Set system prompt
    messages = [{
        "role": "system",
        "content": "You are a helpful assistant that specializes in generating test data in JSON format. The user will provide you with a JSON structure and you generate a complete example JSON with realistic sounding values."}, {
        "role": "user",
        "content": "Context: " + context + "\n\nTemplate: " + json.dumps(template)}]

    # Generate a completion
    example_completion = openai_api_client.chat.completions.create(
        model="gpt-4o", 
        response_format={ "type": "json_object" },
        messages=messages,
        temperature=0.9
    )
    
    # Get the generated example
    result_string = example_completion.choices[0].message.content
    
    # Convert the completion to a dictionary
    result_dict = json.loads(result_string)
    
    # Generate random numbers for the example
    example_dict = complete_example_numbers({
        "N1": random.randint(1000, 10000),
        "N2": 0,
        "N3": 0,
        "N4": 0,
        "N6": random.randint(250, 10000),
        "N7": random.randint(1, 30),
        "N8": 0,
        "N9": random.randint(0, 50),
        "N10": random.randint(150, 2000),
        "N11": random.randint(30, 100),
        "N12": 0,
        "N13": 0,
        "N14": 0,
        "N15": 0,
        "N16": 0
    })
    
    # Format the numbers in the example dictionary
    for key, value in example_dict.items():
        example_dict[key] = format_number(value)
    
    # Add random ID, Telefon and Fax numbers
    example_dict.update({
        "ID": str(random.randint(10000000, 99999999)),
        "Telefon": str(random.randint(1000000000, 9999999999)),
        "Fax": str(random.randint(1000000000, 9999999999))
    })
    
    # Add the generated results to the example dictionary
    example_dict.update(result_dict)
    
    # Return the completed example dictionary
    return example_dict


def replace_text(paragraph, key, value):
    # Replace text in a paragraph
    if f'<{key}>' in paragraph.text:
        paragraph.text = paragraph.text.replace(f'<{key}>', value)
            

def fill_template_and_save(template_path, output_folder, data_dict):
    # Create a copy of the template
    file_name = f"{data_dict['ID']}"
    temp_docx = os.path.join(output_folder, f"{file_name}.docx")
    shutil.copy2(template_path, temp_docx)

    # Load the copied document
    doc = Document(temp_docx)

    # Replace placeholders with values
    for table in doc.tables:
        for row in table.rows:
            for cell in row.cells:
                for paragraph in cell.paragraphs:
                    for key, value in data_dict.items():
                        replace_text(paragraph, key, value)
    """
    # Also check for replacements in paragraphs outside tables
    for paragraph in doc.paragraphs:
        for run in paragraph.runs:
            for key, value in data_dict.items():
                replace_text(run, key, value)
    """

    # Save the modified document
    doc.save(temp_docx)

    # Convert to PDF
    word = win32com.client.dynamic.Dispatch("Word.Application")
    doc = word.Documents.Open(temp_docx)
    pdf_path = os.path.join(output_folder, f"{file_name}.pdf")
    doc.SaveAs(pdf_path, FileFormat=17)  # FileFormat=17 is for PDF
    doc.Close()
    word.Quit()

    # Save the dictionary as a JSON file
    json_path = os.path.join(output_folder, f"{file_name}.json")
    with open(json_path, 'w', encoding='utf-8') as json_file:
        json.dump(data_dict, json_file, ensure_ascii=False, indent=4)

In [7]:
"""
# Define a template for the form values
template_dict = {
    "Institut": "Name of the bank as string",
    "ID": random.randint(10000000, 99999999),
    "Adresse": "Street and house number as string",
    "PLZ/Ort": "Postal code and city as string",
    "Ansprechpartner": "Full name of the contact person as string",
    "Telefon": random.randint(1000000000, 9999999999),
    "Fax": random.randint(1000000000, 9999999999),
    "Mail": "The e-mail of the bank as string",
    "N1": random.randint(1000, 10000),
    "N2": 0,
    "N3": 0,
    "N4": 0,
    # "N5": Sum of N1 to N4,
    "N6": random.randint(250, 10000),
    "N7": random.randint(1, 30),
    "N8": 0,
    "N9": random.randint(0, 50),
    "N10": random.randint(150, 2000),
    "N11": random.randint(30, 100),
    "N12": 0,
    "N13": 0,
    "N14": 0,
    "N15": 0,
    "N16": 0,
    # "N17": Sum of N6 to N16,
    # "N18": Percentage of N5 to N17
}
"""

# Define a template for the form values that are generated
generation_template_dict = {
    "Institut": "Name of the bank as string",
    "Adresse": "Street and house number as string",
    "PLZ/Ort": "Postal code and city as string",
    "Ansprechpartner": "Full name of the contact person as string",
    "Mail": "The e-mail of the bank as string"
}

In [8]:
import os
from dotenv import load_dotenv


# Load the environment variables
load_dotenv()
os.environ['OPENAI_API_KEY'] = os.getenv('OPENAI_API_KEY')
example_template_path = os.getenv('TEMPLATE_DOC')
example_output_path = os.getenv('TEMPLATE_OUTPUT_DOC_PATH')

In [9]:
# Create an instance of the OpenAI class
openai_api_client = OpenAI()

# Generate an example from the template
generated_result = generate_example_from_template(
    "Please generate an example for a random german Bank. Try to be creative and avoid using general names like "
    "Deutsche Bank or Sparkasse.",
    generation_template_dict
)

In [10]:
# Fill the template with the generated example and save it as a PDF
fill_template_and_save(example_template_path, example_output_path, generated_result)

In [11]:
for i in range(9):
    generated_result = generate_example_from_template(
        "Please generate an example for a random german Bank. Try to be creative and avoid using general names like "
        "Deutsche Bank or Sparkasse.",
        generation_template_dict
    )
    fill_template_and_save(example_template_path, example_output_path, generated_result)