# Librerias Necesarias

In [1]:
# Librerias externas
import json
import openai
import pandas as pd

from galois.chatgpt_galois import *
from galois.QueryTree import *
from galois.utils import *

# Funciones necesarias

In [2]:
def obtener_sql_plan(bd:str, verbose:bool, query:str) -> Node: 
    con = run_db(bd)
    con.execute("PRAGMA enable_profiling='query_tree';")
    con.execute("PRAGMA explain_output='ALL';")

    con.execute("EXPLAIN "+query.replace('"',"'"))
    s = con.fetchall()[0][1].split('\n')
    if verbose:
        print("\n".join(s))
        print('\n')
    root = parse_query_tree(s)
    
    return root

# Diccionario de queries para probar

In [3]:
join_query_trees={}
c0 = Node()
c0.text = ['SEQ_SCAN', 'nose']
c0.op = 'SEQ_SCAN'
c0.args = ['nose']

c1 = Node()
c1.text = ['PROJECTION', 'capital']
c1.op = 'PROJECTION'
c1.args = ['capital']
c1.filled_question = 'What is the capital of state of !!x!!?'

c1.l = c0

join_query_trees['SELECT t2.capital FROM state AS t2 JOIN city AS t1 ON t2.state_name = t1.state_name WHERE t1.city_name = "durham";'] = c1


# Ver query #1 en el archivo de planes
# join_query_trees['SELECT name FROM airports WHERE elevation BETWEEN -50 AND 50'] = obtener_sql_plan("data/spider_files/spider/database/flight_4/flight_4.sqlite", 
#                                                                                                      False, 
#                                                                                                      "SELECT name FROM airports WHERE elevation BETWEEN -50 AND 50")

In [9]:
join_query_trees={}
# what is the capital of states that have cities named durham
q = 'SELECT t2.capital FROM state AS t2 JOIN city AS t1 ON t2.state_name = t1.state_name WHERE t1.city_name = "durham";'
# ┌───────────────────────────┐                             
# │         PROJECTION        │                             
# │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
# │          capital          │                             
# └─────────────┬─────────────┘                                                          
#               |                                                         
# ┌─────────────┴─────────────┐                             
# │      COMPARISON_JOIN      │                             
# │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                             
# │           INNER           ├────────────────┐              
# │ (state_name = state_name) │                │              
# └─────────────┬─────────────┘                │   
# ┌─────────────┴─────────────┐  ┌─────────────┴─────────────┐                            
# │           FILTER          │  │          SEQ_SCAN         │                            
# │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │  │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │                           
# │   (city_name = 'durham')  │  │            city           │                            
# └─────────────┬─────────────┘  └───────────────────────────┘                                        
# ┌─────────────┴─────────────┐  
# │          SEQ_SCAN         │  
# │   ─ ─ ─ ─ ─ ─ ─ ─ ─ ─ ─   │  
# │           state           │  
# └───────────────────────────┘                               

c1 = Node()
c1.text = ['SEQ_SCAN','state']
c1.op = 'SEQ_SCAN'
c1.args=['state']

c2 = Node()
c2.text = ['SEQ_SCAN','city']
c2.op = 'SEQ_SCAN'
c2.args=['city']

c3 = Node()
c3.text = ['JOIN','city']
c3.op = 'JOIN'
c3.key_left='What is the state name of !!x!!? Answer briefly.'
c3.key_right = 'What is the state name of !!x!!? Answer briefly.'
c3.filter_key='right'

c4 = Node()
c4.text = ['FILTER',"(city_name = 'durham')"]
c4.op = 'FILTER'
c4.args=["(city_name = 'durham')"]
c4.filled_question = 'Is !!x!! the same as Durham? Answer with Yes or No only.'

# c4 = Node()
# c4.text = ['SEQ_SCAN','nose']
# c4.op = 'SEQ_SCAN'
# c4.args=['nose']

c5 = Node()
c5.text = ['PROJECTION','capital']
c5.op = 'PROJECTION'
c5.args=['capital']
c5.filled_question = 'What is the capital of state of !!x!!?'


c3.l=c1
c3.r = c2
c4.l=c3
c5.l=c4

join_query_trees[q] = c5

# Traduccion de los operadores de SQL a NL

In [4]:
question_maps = json.load(open('data/question_maps.json','r'))
augmented_question_maps = augment_questions(question_maps)

# Configuración para chatgpt

In [5]:
# Instrucciones para chatgpt
inst_chatgpt = "You are a highly intelligent question answering bot. If I ask you a question that is rooted in truth, you will give you the answer. If I ask you a question that is nonsense, trickery, or has no clear answer, you will respond with 'Unknown'. You will answer concisely."

In [6]:
# Ejemplos para chatgpt
fewshot_chatgpt = [
 ['What is human life expectancy in the United States?', '78.'],
 ['Who was president of the United States in 1955?', 'Dwight D. Eisenhower.'],
 ['Which party was founded by Gramsci?', 'Comunista.'],
 ['What is the capital of France?', 'Paris.'],
 ['What is a continent starting with letter O?', 'Oceania.'],
 ['Where were the 1992 Olympics held?', 'Barcelona.'],
 ['How many squigs are in a bonk?', 'Unknown'],
 ['List me 5 fruits', 'apple, banana, peach, grape, grapefruit, strawberry'], 
 ['List me the first 5 prime numbers', '2, 3, 5, 7, 11'],
 ['List me 3 presidents of america', 'George Washington, Abraham Lincoln, Franklin D. Roosevelt']]

# Ejecutar galois

In [10]:
nombre= "experimento#"
contador = 1
for query, plan in join_query_trees.items():
    GPT_SPWJ(model_arch='gpt-3.5-turbo', 
            query=query,
            instr=inst_chatgpt,
            few_shots=fewshot_chatgpt,
            inst_funct=1,
            label=f'{nombre}{contador}',
            augmented_question_maps=augmented_question_maps,
            query_plan_dict=join_query_trees,
            verbose=True)

SELECT t2.capital FROM state AS t2 JOIN city AS t1 ON t2.state_name = t1.state_name WHERE t1.city_name = "durham";


Query in dict
Mode:  r
['SEQ_SCAN', 'state']
Tree Nodes:  ['SEQ_SCAN_state']
SEQ_SCAN_state
OP:  SEQ_SCAN_state
Q:  List me some american states. Separate them by a comma. List as much as you can.
NOT IN CACHE
RUNNING SEQUENTIAL SCANS...
[{'role': 'system', 'content': "You are a highly intelligent question answering bot. If I ask you a question that is rooted in truth, you will give you the answer. If I ask you a question that is nonsense, trickery, or has no clear answer, you will respond with 'Unknown'. You will answer concisely."}, {'role': 'user', 'content': 'What is human life expectancy in the United States?'}, {'role': 'assistant', 'content': '78.'}, {'role': 'user', 'content': 'Who was president of the United States in 1955?'}, {'role': 'assistant', 'content': 'Dwight D. Eisenhower.'}, {'role': 'user', 'content': 'Which party was founded by Gramsci?'}, {'role': '