### Install packages

In [None]:
%pip install openai==1.12.0
%pip install pycountry

### Required packages

In [None]:
import openai
from openai import AzureOpenAI
import json
import random
import pycountry

### Set up Azure OpenAI connection
###### Reference: https://community.fabric.microsoft.com/t5/Hack-Together/Fabric-cant-import-AzureOpenAI/m-p/3703267#M66

In [None]:
client = AzureOpenAI(
    # Include endpoint - for the competition it was https://polite-ground-030dc3103.4.azurestaticapps.net/api/v1
    azure_endpoint="ADD ENDPOINT",
    api_key="YOUR API KEY",  # Add API KEY
    api_version="2023-09-01-preview",
)

### Create prompt

In [None]:
# Prompt is a combination of 3 parts:

# Prompt text
prompt_text = "Can you create the dataframes detailed below:"

# Data_specs_JSON string
data_specs_file_path = r"/lakehouse/default/Files/Data_Specs.txt"
with open(data_specs_file_path, 'r') as file:
    data_specs_content = file.read()
data_specs = f"data_specs_details = json.loads({json.dumps(data_specs_content)})"

# Additional instructions - adjust as needed
additional_instructions = """
Ensure these instructions are followed:
For the Fact table, use the IDs from the Dim_Product, Dim_Store, and Dim_Customer tables.
All columns within the arrays need to be the same length.
IDs should be unique in the dimension dataframes, but can be repeated in the fact table.
Dates can be repeated in the fact table, so all columns from the array are of the same length.
Only provide the python code in your response, if there are any comments, please wrap them as python comments using the # 
Please create just the script, with no printing steps. The script should end when the dataframes get created 
"""

# Combine the 3 into a single text
PROMPT = f"{prompt_text}\n\n{data_specs}\n\n{additional_instructions}"

print(PROMPT)

### Get AzureOpenAI response

In [None]:
# Run using AzureOpenAI - Select the model and temperature
MESSAGES = []
MESSAGES.append({"role": "user", "content": PROMPT})
completion = client.chat.completions.create(
    # model and temperature adjusted as suggested here: https://www.reddit.com/r/ChatGPTCoding/comments/12i6k06/best_temperature_for_gpt4_api_to_get_quality/
    model="gpt-35-turbo", messages=MESSAGES, temperature=0.9)
code = completion.choices[0].message.content
print(code)

### Enhance response based on Script_Help file

In [None]:
# Enhance code with Script_Help practices

# Ensure your script file path is correct
script_file_path = "/lakehouse/default/Files/Script_Help.txt"

# Read the script content into a variable
with open(script_file_path, 'r') as file:
    script_content = file.read()

prompt_1 = """
Can you enhance this code? 
Only provide the python code in your response, if there are any comments, please wrap them as python comments using the # 
Please create just the script, no printing steps. The script should end when the dataframes dictionary with all the dataframes get created 
"""
prompt_2 = code
prompt_3 = "using the details below:"
prompt_4 = script_content
ADJUSTED_PROMPT = f"{prompt_1}\n\n{prompt_2}\n\n{prompt_3}\n\n{prompt_4}"

print(ADJUSTED_PROMPT)

### Get Final Response

In [None]:
# Run using AzureOpenAI - Select the model and temperature
MESSAGES = []
MESSAGES.append({"role": "user", "content": ADJUSTED_PROMPT})
completion = client.chat.completions.create(
    # model and temperature adjusted as suggested here: https://www.reddit.com/r/ChatGPTCoding/comments/12i6k06/best_temperature_for_gpt4_api_to_get_quality/
    model="gpt-35-turbo", messages=MESSAGES, temperature=0.9)
adjusted_code = completion.choices[0].message.content
print(adjusted_code)

### Execute code

In [None]:
exec(adjusted_code)

### Create csv files and load to Lakehouse

In [None]:
# Assuming the 'dataframes' dictionary is already defined as per your snippet
for name, df in dataframes.items():
    # Construct the file path where the CSV will be saved
    file_path = f"/lakehouse/default/Files/csv_files/{name}.csv"
    # Save the DataFrame to CSV
    # Set index=False if you don't want to include the index in the CSV
    df.to_csv(file_path, index=False)

print("CSV files have been saved or updated.")