In [29]:
from langchain.chat_models import ChatOpenAI
from langchain.chains import LLMChain
from langchain.prompts import PromptTemplate
from langchain.sql_database import SQLDatabase
import re

def clean_sql_query(query):
    """Removes code block markers and extra formatting from SQL queries."""
    return re.sub(r"```[a-z]*", "", query).strip()

def generate_and_execute_query(user_query, db_file, max_retries=3):
    # Connect to the local SQLite database file
    db = SQLDatabase.from_uri(f"sqlite:///{db_file}")
    # Dynamically extract the schema information from the database
    schema_info = db.get_table_info()
    
    # Chain-of-thought prompt template: asks the model to reason step-by-step and then produce a final query
    cot_prompt = PromptTemplate(
        input_variables=["user_query", "schema_information"],
        template="""
You are an expert SQL query planner. I will provide a user query and the database schema. 
You should:
1. Reason step-by-step about how to solve the user's request using the given schema.
2. Then produce a final SQL query that can be executed to answer the user's question.
3. Show your reasoning as a chain-of-thought. At the end, write "EXECUTE:" followed by only the final query.

User query: {user_query}

Schema Information:
{schema_information}

First, reason step-by-step. Then output the final query prefixed by 'EXECUTE:'.
"""
    )

    # Use ChatOpenAI for chat-based models (e.g., GPT-4 or GPT-3.5)
    llm = ChatOpenAI(temperature=0, model="gpt-4")
    reasoning_chain = LLMChain(llm=llm, prompt=cot_prompt)

    attempt = 0
    reasoning_output = ""
    final_query = ""

    while attempt <= max_retries:
        if attempt == 0:
            # First attempt: produce reasoning and query
            reasoning_output = reasoning_chain.run(user_query=user_query, schema_information=schema_info)
        else:
            # On retries, refine based on the last error
            refinement_chain = LLMChain(llm=llm, prompt=PromptTemplate(
                input_variables=["refinement"],
                template="{refinement}"
            ))
            refinement_prompt = f"""
Your previous query produced an error:
{error_msg}

Please refine your chain-of-thought and produce a corrected SQL query.

{reasoning_output}

Remember to output 'EXECUTE:' followed by the corrected query.
"""
            reasoning_output = refinement_chain.run(refinement=refinement_prompt)

        # Parse the final query from the LLM output
        if "EXECUTE:" not in reasoning_output:
            error_msg = "No 'EXECUTE:' found in the model output."
            attempt += 1
            if attempt > max_retries:
                raise ValueError("Exceeded maximum retries without a valid query.")
            continue

        # Clean the query to remove formatting issues
        final_query = clean_sql_query(reasoning_output.split("EXECUTE:")[-1].strip())

        # Try executing the query
        try:
            results = db.run(final_query)
            return final_query, results
        except Exception as e:
            error_msg = str(e)
            attempt += 1
            if attempt > max_retries:
                raise RuntimeError(f"Failed to produce a working query after {max_retries} attempts.\nLast error: {error_msg}")

    raise RuntimeError("Exited the loop without success or error.")

if __name__ == "__main__":
    DB_FILE = "which employee has the longest name?"
    final_sql, results = generate_and_execute_query(USER_QUERY, DB_FILE, max_retries=3)
    print("Final SQL Query:", final_sql)
    print("Results:")
    display(results_df)
    


ValueError: Exceeded maximum retries without a valid query.