In [9]:
import argparse
from openai import AzureOpenAI
import openai
import os

current_directory = os.path.dirname(os.path.abspath("__file__"))
os.chdir(current_directory)

In [10]:
client = AzureOpenAI(
    api_version="2023-05-15",
     azure_endpoint="", # path to azure endpoint
    api_key="" # replace with API_key
)

In [11]:
def generate_prompt(question, prompt_file, metadata_file):
    """
    Generates a customized prompt by replacing placeholders in a template with actual data.

    Args:
        question (str): The user's question to be included in the prompt.
        prompt_file (str): Path to a file containing a template prompt with placeholders.
        metadata_file (str): Path to a file containing metadata (e.g., database table information).

    Returns:
        str: The generated prompt with placeholders replaced by actual data.
    """
    with open(prompt_file, "r") as f:
        prompt = f.read()
    
    with open(metadata_file, "r") as f:
        table_metadata_string = f.read()
        

    prompt = prompt.format(
        user_question=question, table_metadata_string=table_metadata_string)
    
    return prompt

In [12]:
def SQL_query_generation(question, prompt_file, metadata_file, temp):
    """
    Generates an SQL query response based on a user question and template prompts.

    Args:
        question (str): The user's question or query.
        prompt_file (str): Path to a file containing template prompts for OpenAI.
        metadata_file (str): Path to a file containing relevant metadata (e.g., table information).
        temp (float): Temperature parameter for controlling response randomness.

    Returns:
        str: The generated SQL query response.
    """    
    prompt = generate_prompt(question, prompt_file , metadata_file)
    try:
        sys_prompt = prompt.split("### Input:")[0]
        user_prompt = prompt.split("### Input:")[1].split("### Response:")[0]
        assistant_prompt = prompt.split("### Response:")[1]
    except:
        raise ValueError("Invalid prompt file. Please use prompt_openai.md")
  
    messages = []
    messages.append({"role": "system", "content": sys_prompt})
    messages.append({"role": "user", "content": user_prompt})
    messages.append({"role": "assistant", "content": assistant_prompt})

    response = client.chat.completions.create(
        model="chat",
        messages=messages,
        temperature = temp
    )

    return response.model_dump()["choices"][0]["message"]["content"]

In [13]:
#define file pathways
questions_input = '../GPT_pipeline/questions.txt'
prompt_file = '../GPT_pipeline/postgreSQL_database_assessment/post_prompt_baseline.md'
metadata_file = '../GPT_pipeline/postgreSQL_database_assessment/post_metadata_baseline.sql'

with open(questions_input, 'r') as f:
    questions = f.read().split('\n')

In [8]:
# Loop over the questions, each 10 times, create a file per each question.

# TEMPERATURE = 0
for question_number, question in enumerate(questions, start=1):
    filename = f"../3T_variance_postgres/T0/q{question_number}_T0.txt" # define output file pathway
    with open(filename, "w") as file:
        for i in range(10):
            SQLcode = SQL_query_generation(question, prompt_file, metadata_file, temp= 0.0).split("```")
            SQLcode_to_save = SQLcode[0].strip()
            if SQLcode_to_save[-1] == ';':
                file.write(f"{i+1}\n{SQLcode_to_save}\n\n")
            else:
                file.write(f"{i+1}\n{SQLcode_to_save};\n\n")

    print(f"File '{filename}' created with 10 variations of question {question_number}.")

File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q1_T0.txt' created with 10 variations of question 1.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q2_T0.txt' created with 10 variations of question 2.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q3_T0.txt' created with 10 variations of question 3.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q4_T0.txt' created with 10 variations of question 4.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q5_T0.txt' created with 10 variations of question 5.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q6_T0.txt' created with 10 variations of question 6.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q7_T0.txt' created with 10 variations of question 7.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q8_T0.txt' created with 10 variations of question 8.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T0/q9_T0.txt' created with 10 

In [14]:
# Loop over the questions, each 10 times, create a file per each question.

# TEMPERATURE = 0.5
for question_number, question in enumerate(questions, start=1):
    filename = f"../3T_variance_postgres/T05/q{question_number}_T05.txt" # define output file pathway
    with open(filename, "w") as file:
        for i in range(10):
            SQLcode = SQL_query_generation(question, prompt_file, metadata_file, temp= 0.5).split("```")
            SQLcode_to_save = SQLcode[0].strip()
            if SQLcode_to_save[-1] == ';':
                file.write(f"{i+1}\n{SQLcode_to_save}\n\n")
            else:
                file.write(f"{i+1}\n{SQLcode_to_save};\n\n")

    print(f"File '{filename}' created with 10 variations of question {question_number}.")

File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q1_T05.txt' created with 10 variations of question 1.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q2_T05.txt' created with 10 variations of question 2.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q3_T05.txt' created with 10 variations of question 3.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q4_T05.txt' created with 10 variations of question 4.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q5_T05.txt' created with 10 variations of question 5.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q6_T05.txt' created with 10 variations of question 6.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q7_T05.txt' created with 10 variations of question 7.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q8_T05.txt' created with 10 variations of question 8.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T05/q9_T05.txt

In [15]:
# Loop over the questions, each 10 times, create a file per each question.

# TEMPERATURE = 0.7
for question_number, question in enumerate(questions, start=1):
    filename = f"../3T_variance_postgres/T07/q{question_number}_T07.txt" # define output file pathway
    with open(filename, "w") as file:
        for i in range(10):
            SQLcode = SQL_query_generation(question, prompt_file, metadata_file, temp= 0.7).split("```")
            SQLcode_to_save = SQLcode[0].strip()
            if SQLcode_to_save[-1] == ';':
                file.write(f"{i+1}\n{SQLcode_to_save}\n\n")
            else:
                file.write(f"{i+1}\n{SQLcode_to_save};\n\n")

    print(f"File '{filename}' created with 10 variations of question {question_number}.")

File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q1_T07.txt' created with 10 variations of question 1.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q2_T07.txt' created with 10 variations of question 2.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q3_T07.txt' created with 10 variations of question 3.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q4_T07.txt' created with 10 variations of question 4.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q5_T07.txt' created with 10 variations of question 5.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q6_T07.txt' created with 10 variations of question 6.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q7_T07.txt' created with 10 variations of question 7.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q8_T07.txt' created with 10 variations of question 8.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T07/q9_T07.txt

In [16]:
# Loop over the questions, each 10 times, create a file per each question.

# TEMPERATURE = 1
for question_number, question in enumerate(questions, start=1):
    filename = f"../3T_variance_postgres/T1/q{question_number}_T1.txt" # define output file pathway
    with open(filename, "w") as file:
        for i in range(10):
            SQLcode = SQL_query_generation(question, prompt_file, metadata_file, temp= 1).split("```")
            SQLcode_to_save = SQLcode[0].strip()
            if SQLcode_to_save[-1] == ';':
                file.write(f"{i+1}\n{SQLcode_to_save}\n\n")
            else:
                file.write(f"{i+1}\n{SQLcode_to_save};\n\n")

    print(f"File '{filename}' created with 10 variations of question {question_number}.")

File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q1_T1.txt' created with 10 variations of question 1.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q2_T1.txt' created with 10 variations of question 2.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q3_T1.txt' created with 10 variations of question 3.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q4_T1.txt' created with 10 variations of question 4.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q5_T1.txt' created with 10 variations of question 5.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q6_T1.txt' created with 10 variations of question 6.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q7_T1.txt' created with 10 variations of question 7.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q8_T1.txt' created with 10 variations of question 8.
File 'C:/Users/aamt/Documents/openAI/3T_variance_postgres/T1/q9_T1.txt' created with 10 