In [None]:
import sys
import json
import asyncio
import traceback
import numpy as np
import pandas as pd
from pathlib import Path
from openai import OpenAI
from pimresearch import pimstore
from typing import Sequence, Any
from pydantic import BaseModel, Field
from typing import Union, Tuple, List, Optional
from pprint import pprint
# # Init OpenAI Client
client = OpenAI(api_key=OPENAI_API_KEY, base_url=PIMCO_OPENAI_BASE_URL, default_headers=default_headers)

In [None]:
# # Schema
# ## Chain of Thought Reasoning Schema
class Background(BaseModel):
    """A setup to the background for the user."""
    background: str = Field(description="Background for the user's question", min_length=10)

class Thought(BaseModel):
    """A thought about the user's question."""
    thought: str = Field(description="Text of the thought.")
    helpful: bool = Field(description="Whether the thought is helpful to solving the user's question.")

class Observation(BaseModel):
    """An observation on the sequence of thoughts and observations generated so far."""
    observation: str = Field(description="An insightful observation on the sequence of thoughts and observations generated so far.")

class Reasonings(BaseModel):
    """Returns a detailed reasoning to the user's question."""
    reasonings: list[Union[Background, Thought, Observation]] = Field(description="Reasonings to solve the users questions.")

sample_reasonings = Reasonings(reasonings=[Background(background="The task is to generate SQL from natural language query."),
                                           Thought(thought="First thought", helpful="True"),
                                           Thought(thought="Second thought", helpful="True"),
                                           Thought(thought="Third thought", helpful="True"),
                                           Thought(thought="Fourth thought", helpful="True"),
                                           Thought(thought="Fifth thought", helpful="True"),
                                           Observation(observation="Astute observation")])
sample_reasonings.json()
reasonings_schema_json = Reasonings.schema_json()

# ## SQL Schema
class FinalQueryOutput(BaseModel):
    user_nlp_query: str = Field(description="Returns the exact question that the user asked in natural language which is to be translated into SQL Query.")
    reasonings: list[Union[Background, Thought, Observation]] = Field(description="Reasonings to solve the users questions.")
    generated_sql_query: str = Field(description="Returns the SQL Language Query corresponding to the NLP description of the user question.")

final_output_schema_json = FinalQueryOutput.schema_json()

sample_output = FinalQueryOutput(user_nlp_query="Get count of rows.",
                                 reasonings=[Background(background="Deadline is near"),
                                             Thought(thought="First thought", helpful="True"),
                                             Thought(thought="Second thought", helpful="True"),
                                             Thought(thought="Third thought", helpful="True"),
                                             Thought(thought="Fourth thought", helpful="True"),
                                             Thought(thought="Fifth thought", helpful="True"),
                                             Observation(observation="Astute observation")],
                                 generated_sql_query="Select count * from fact_table")

sample_output
sample_output.json()
final_output_schema_json

In [None]:
# # User Prompt

# ## Background and Table Structure
complete_user_prompts = "```\nTask Overview\nThe task is to convert the given natural language query to the corresponding SQL query.\nYou will be provided with the schema of the database and a background on the tables and the columns in them.\nSQL Schema for the database:\nCustomers table contains columns cust_id, cust_name.\nOrders table contains columns order_num, order_date, cust_id.\nVendors table contains columns vend_id, vend_name.\nProducts table contains columns prod_id, vend_id, prod_name, prod_price.\nOrderItems table contains columns order_num, prod_id, quantity, item_price.\n```"

# ## Reasoning Instructions
reasoning_instructions = "```\n1. Reasoning you provide should first focus on why a nested query was chosen or why it wasn't chosen.\n2. It should give a query plan on how to solve this question - explain the mapping of the columns to the words in the input question.\n3. It should explain each of the clauses and why they are structured the way they are structured. For example, if there is a `group_by`, an explanation should be given as to why it exists.\n4. If there's any sum() or any other function used it should be explained as to why it was required.\nFormat the generated sql with proper indentation - the columns in the (`select` statement should have more indentation than keyword `select` and so on for each SQL clause.)\n```"

# ## Thought Instructions
thought_instructions = "```\nThought Instructions:\nGenerate thoughts of increasing complexity. Each thought should build on the previous ones and thoughts should progressively cover the nuances of the problem at hand.\n```"
reasoning_schema_instructions = f"```\nUse the following JSON Schema as the grammar to create the structure for the step by step reasoning, and then to create the final SQL query.\nSchema for Reasoning:\n{reasonings_schema_json}\nThe instructions on how to structure the reasoning is provided below:\n{thought_instructions}\nSchema for Overall Output:\n(This includes the reasonings schema above as an element)\n{final_output_schema_json}\nThe final response should be a json with `names` as `user_nlp_query`, `reasonings` and `generated_sql_query`:\n1. `user_nlp_query` should be exactly the same as the user query in string format.\n1. `reasonings` should provide the reasoning steps adhering to the Reasonings schema.\n2. `generated_sql_schema` should provide the SQL query generated in string format.\n- this is the final answer.\n```"

def get_user_prompt_for_question(input_question):
    user_prompt = f"```\nHere's the question that the user entered:\n```\n{input_question}\nGenerate a SQL query corresponding to the given input question and the description of the table provided below.\n{complete_user_prompts}\nReasoning as to why the query is correct:\n{reasoning_instructions}\n{reasoning_schema_instructions}\nResponse for SQL Generation:\n```"
    return user_prompt


In [None]:
# # System Prompt
system_prompt_snippet_001 = "```\nYou are the most intelligent person in the world.\n```"
system_prompt_snippet_002 = "```\nYou will receive a $500 tip if you follow ALL the instructions specified.\n```"
system_prompt_snippet_003 = "```\nInstructions\nProvide an explanation of why the given sql query is correct based on the input request and the description of the columns.\nUse step by step reasoning and at each step generate thoughts of increasing complexity.\n```"
system_prompt_snippet_004 = "```\nGetting this answer right is important for my career. Please do your best.\n```"
system_prompt = f"{system_prompt_snippet_001}\n{system_prompt_snippet_002}\n{system_prompt_snippet_003}\n{system_prompt_snippet_004}"
print(system_prompt)

In [None]:
# # GPT Call Function
def call_openai_model(system_prompt, user_prompt, model_name):
    chat_history = [{'role': 'system', 'content': system_prompt}, {'role': 'user', 'content': user_prompt}]
    final_response = {}
    try:
        response = client.chat.completions.create(model=model_name, messages=chat_history, extra_headers=extra_headers, response_format={"type": "json_object"})
        final_response = response.choices[0].message.content
    except Exception:
        exc_type, exc_value, exc_traceback = sys.exc_info()
        msg = f"Caught an exception {exc_type} in '{context.name}': {exc_value}"
        print(msg)
        traceback.print_tb(exc_traceback)
        response = {"content": "An error occured. Please retry your chat. If you keep getting this error, you may be out of OpenAI completion tokens. Contact #help-ai on slack for assistance."}
        return response
    return final_response

# ## Test Prompt on Sample Question
input_question = "Return a list of customers with a count of orders for each."
user_prompt_01 = get_user_prompt_for_question(input_question)
len(user_prompt_01) / 4
response = call_openai_model(system_prompt=system_prompt, user_prompt=get_user_prompt_for_question(input_question), model_name='gpt-4o')
response_parsed = json.loads(response)
print(response)
print(response_parsed["generated_sql_query"])