In [1]:
from langchain_openai import ChatOpenAI
from urllib.parse import quote  

from dotenv import load_dotenv
import time
import os
import sys
import json
load_dotenv(dotenv_path=os.path.join("..","..",".env"))
#load_dotenv(dotenv_path="../../.env") This is equivalent, but hardcoded

#To test if the API key is working and able to connect to OpenAI
#print(os.getenv('OPENAI_API_KEY'))
# llm  = ChatOpenAI()
# respo = llm.invoke("Say hi bro")
# print(respo.content)

from c3_clear_prompting import generate_clear_prompting
from c3_calibration_with_hints import generate_calibration_with_hints
from c3_generate_sql import generate_sql

current_dir = os.path.abspath('')
#Gives the absolute path of the current directory
#print(current_dir)
functions_path = os.path.abspath(os.path.join(current_dir, '..', '..', 'functions'))
#Now we go up 2 directories, so back to /Text2SQL and then look for functions, where our sqldatabase_langchain_utils.py is
if functions_path not in sys.path:
    sys.path.append(functions_path)


from sqldatabase_langchain_utils import SQLDatabaseLangchainUtils
from oracle_connection import get_oracle_connection

In [2]:
SCHEMA = 'SHIPMENT'
PREFIX = 'shipment'
FILE_NAME_RESULT = f"./results.json"

In [3]:
def save_queries(queries):
    data = {"queries":queries}
    with open(FILE_NAME_RESULT, "w") as arquivo_json:
        json.dump(data, arquivo_json, indent=4) 
        
def read_queries():
    with open(FILE_NAME_RESULT, encoding='utf-8', errors='ignore') as json_data:
        data = json.load(json_data, strict=False)
    queries = data["queries"]
    return queries


In [4]:
db_connection = {
    "DB_USER_NAME": "SHIPMENT",
    "DB_PASS": "oraclee",
    "DB_HOST": "localhost",
    "DB_PORT": 1521,
    "DB_NAME": "XE",  # optional
    "SQL_DRIVER": "oracle+oracledb",
    "SERVICE_NAME": "XE"
}

db = SQLDatabaseLangchainUtils(db_connection=db_connection, schema='SHIPMENT')

include_tables = db.get_table_names()

# Initialise a connection to Oracle Shipment database and loads all table names

In [5]:
def run_c3(question, db, model='gpt-3.5-turbo', add_fk = True, callback= None):
    llm = ChatOpenAI(model_name = model, temperature=0.7, n=1)
    clear_prompting = generate_clear_prompting(question, db, llm, add_fk=add_fk, callback=callback)
    print(clear_prompting)
    messages = generate_calibration_with_hints(clear_prompting)
    llm = ChatOpenAI(model_name = model, n=1)
    sql = generate_sql(messages, llm, db, question, callback=callback)
    return sql

# The actual C3 method, Prompt -> Hints -> SQL

In [6]:
json_file_path = f"./questionsLab3.json"
with open(json_file_path, encoding='utf-8', errors='ignore') as json_data:
    queries = json.load(json_data, strict=False)
queries = queries['queries']
queries

# Loads the 10 questions into queries variable
print(f"Number of queries: {len(queries)}")

Number of queries: 10


In [7]:
track_token = [] 
def tracking_token(cb =None, reset = False):
    global track_token
    track_token.append(cb)
    if reset:
        track_token = []

In [8]:
def convert_to_dict_tracking_token():
    token_usage = {}
    for e in track_token:
        for key in e.keys():
            token_usage[key] = {}
            token_usage[key]['total_tokens'] = e[key].total_tokens
            token_usage[key]['total_cost'] = e[key].total_cost
            token_usage[key]['prompt_tokens'] = e[key].prompt_tokens
            token_usage[key]['completion_tokens'] = e[key].completion_tokens 
    return token_usage

In [9]:

results = []
for i, question in enumerate(queries):
    print(f"\n--- Question {i+1} ---")
    print(question)
    tracking_token(reset=True)  # Reset tracking for each question
    start_time = time.time()
    sql = run_c3(question, db, model='gpt-4', callback=tracking_token)
    end_time = time.time()
    results.append({'question': question, 'sql': sql, 'time': end_time - start_time, 'token-usage': convert_to_dict_tracking_token()})

save_queries(results)    


--- Question 1 ---
Return the customer name and address as one field called fullAddress that consists of the address, city, state. Only show customers in 'IA' or 'BC' that have a valid address. Order by customer name ascending.
Column recall attempt: 1

### Complete oracle SQL query only and with no explanation, and do not select extra columns that are not explicitly requested in the query. 
### Oracle SQL tables, with their properties: 
#
# customer (cname, address, city, state)
# product ()
# shipment ()
# shippedproduct ()
# shipment.cid=customer.cid
# shipment.cid=customer.cid
# shippedproduct.sid=shipment.sid
# shippedproduct.pid=product.pid

#
### Return the customer name and address as one field called fullAddress that consists of the address, city, state. Only show customers in 'IA' or 'BC' that have a valid address. Order by customer name ascending.
SELECT

--- Question 2 ---
Return the shipment id, shipment date, product id, and amount for all shipments in 2022 where there w