In [1]:
import pandas as pd
from sqlalchemy import create_engine
from llama_index.llms.openai import OpenAI
from llama_index.core.query_engine import NLSQLTableQueryEngine
from llama_index.core import SQLDatabase
from llama_index.embeddings.openai import OpenAIEmbedding

from llama_index.core.prompts import BasePromptTemplate, PromptTemplate


In [2]:
file_path = "data/out/recipes_flat.json"
df = pd.read_json(file_path)
llm_gpt = OpenAI(model="gpt-4o-mini", temperature=0.5, max_tokens=100)
# embed_gpt = OpenAIEmbedding(model='text-embedding-3-large')


In [3]:
engine = create_engine("sqlite:///data/out/recipes.db")

sql_database = SQLDatabase(
    engine=engine, include_tables=["recipes"], sample_rows_in_table_info=2
)


We need to tune the model because the LLM can return aswers that are not related to the question. We can use a simple heuristic to filter out these answers. We can check if the answer contains any of the words in the question. If it does, we can discard the answer. This heuristic is not perfect, but it can help to improve the performance of the model. we can modify the model for the query search and the response too


In [4]:
PROMPT_MODEL = """
'Given an input question, synthesize a response from the query results.

If the question is about a recipe, provide the recipe details. If the answer is not in the database, respond with "I don't have that information". If the question is not about a recipe, respond with "I just can help you with recipes".


Query: {query_str}
SQL: {sql_query}
SQL Response: {context_str}
Response: '
"""


response_synthesis_prompt = PromptTemplate(
    template=PROMPT_MODEL,
)


In [5]:
query_engine = NLSQLTableQueryEngine(
    sql_database=sql_database,
    tables=["recipes"],
    llm=llm_gpt,
    response_synthesis_prompt=response_synthesis_prompt,
)


In [6]:
query_engine.get_prompts()["response_synthesis_prompt"]


PromptTemplate(metadata={'prompt_type': <PromptType.CUSTOM: 'custom'>}, template_vars=['query_str', 'sql_query', 'context_str'], kwargs={}, output_parser=None, template_var_mappings=None, function_mappings=None, template='\n\'Given an input question, synthesize a response from the query results.\n\nIf the question is about a recipe, provide the recipe details. If the answer is not in the database, respond with "I don\'t have that information". If the question is not about a recipe, respond with "I just can help you with recipes".\n\n\nQuery: {query_str}\nSQL: {sql_query}\nSQL Response: {context_str}\nResponse: \'\n')

In [7]:
query_str = "Quien es José Smith"
response = query_engine.query(query_str)
response.response


'I just can help you with recipes.'

In [8]:
print(response.metadata["sql_query"])


SELECT * FROM recipes WHERE ingredients LIKE '%José Smith%'


In [9]:
print(response.metadata["result"])


[]
