# Template for task: Use GenAI to Create Dummy Data Based On The Previous Design 

In [None]:
import os
from openai import OpenAI
 
client = OpenAI()

In [None]:
def create_dummy_data_prompt(sql):
    parts = []
    parts.append("Given these duckdb SQL to create table(s).")
    
    parts.append("<sql>")
    parts.append(sql)
    parts.append("</sql>")
 
    parts.append("Create 5 insert statement to insert dummy data to each table")
    parts.append("Make the data as real as possible")
    parts.append("Ensure any table constraints (e.g. foreign key, unique) fulfilled")
    parts.append("Ensure each SQL statement is ends with character ';'")
    parts.append("Just give the SQL insert statement. Don't give any explanation.")
    
    parts.append("Don't format your answer as code. Give the output as plain text format.")
    parts.append("This is the expected output format:")
    parts.append("""
    INSERT INTO table_name (col1, col2,...) VALUES (val1, val2, ...);
    INSERT INTO table_name (col1, col2,...) VALUES (val1, val2, ...);
    ...
    """
    )
    
    return '\n'.join(parts)

In [None]:
def call_genai(prompt):
    for nr_retries in range(1, 4):
        try:
            response = client.chat.completions.create(
                model='gpt-4o-mini',
                messages=[
                    {
                        'role': 'system',
                        'content': 'You are an SQL engineer tasked to create SQL query for existing table and relationships.'
                    },
                    {
                        'role': 'user',
                        'content': prompt
                    }
                ]
            )
            return response.choices[0].message.content
        except:
            time.sleep(5)
    raise Exception('Cannot query OpenAI model!')

In [None]:
def create_dummy_data(sql_table_folder, output_folder):
    full_sql_script = ""
    sql_files = sorted([file for file in os.listdir(sql_table_folder) if file.endswith('.sql')])
 
    os.makedirs(output_folder, exist_ok=True)
 
    for file in sql_files:
        input_sql_file_path = os.path.join(sql_table_folder, file)
        with open(input_sql_file_path, 'r') as f:
            sql_script = f.read()
            full_sql_script += sql_script + "\n\n"
            
    prompt = create_dummy_data_prompt(full_sql_script)
            
    response = call_genai(prompt)
 
    output_sql_file_path = os.path.join(output_folder, f"dummy_data_full.sql")
    with open(output_sql_file_path, 'w') as output_file:
        output_file.write(response)
    
    print(f"SQL dummy data {output_sql_file_path} generated successfully.")

In [None]:
# Create the dummy data
create_dummy_data('genai_design/table', 'genai_design/dummy_data')