### **Test the sqldb**

In [60]:
from langchain_community.utilities import SQLDatabase
from pyprojroot import here
import warnings
warnings.filterwarnings("ignore")

**Connecting to the sqldb**

In [61]:
db_path = str(here("data")) + "/data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [62]:
db

<langchain_community.utilities.sql_database.SQLDatabase at 0x209b0a9bf70>

In [63]:
# validate the connection to the vectordb
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM cars LIMIT 10;")

sqlite
['cars']


"[('BMW', '3 Series', 2018, '330i xDrive', '330i xDrive 4dr Sedan AWD (2.0L 4cyl Turbo 8A)', 'Sedan', 'I4', 'all wheel drive', 'gas', '363.4/521.4 mi.', '$39,795'), ('BMW', '3 Series', 2018, '340i', '340i 4dr Sedan (3.0L 6cyl Turbo 8A)', 'Sedan', 'I6', 'rear wheel drive', 'gas', '331.8/505.6 mi.', '$46,025'), ('BMW', '3 Series', 2018, '340i xDrive', '340i xDrive 4dr Sedan AWD (3.0L 6cyl Turbo 8A)', 'Sedan', 'I6', 'all wheel drive', 'gas', '331.8/489.8 mi.', '$47,885'), ('BMW', '3 Series', 2018, '328d', '328d 4dr Sedan (2.0L 4cyl Turbodiesel 8A)', 'Sedan', 'I4', 'rear wheel drive', 'diesel', '465.0/645.0 mi.', '$39,330'), ('BMW', '3 Series', 2018, '328d xDrive', '328d xDrive 4dr Sedan AWD (2.0L 4cyl Turbodiesel 8A)', 'Sedan', 'I4', 'all wheel drive', 'diesel', '450.0/600.0 mi.', '$41,190'), ('BMW', '3 Series', 2018, '328d xDrive', '328d xDrive 4dr Wagon AWD (2.0L 4cyl Turbodiesel 8A)', 'Wagon', 'I4', 'all wheel drive', 'diesel', '450.0/600.0 mi.', '$43,420'), ('BMW', '3 Series', 2018, '

### **Test the access to the environment variables**

In [64]:
from openai import AzureOpenAI
from dotenv import load_dotenv
import os
print("Environment variables are loaded:", load_dotenv())

Environment variables are loaded: True


### **Test my GPT model**

In [65]:
client = AzureOpenAI(
    api_key=os.getenv("AZURE_OPENAI_GPT_API_KEY"),  
    api_version="2024-07-01-preview",
    azure_endpoint=os.getenv("AZURE_OPENAI_GPT_ENDPOINT")
)

In [66]:
messages = [
    {"role": "system", "content": str(
        "You are a helpful assistant"
    )},
    {"role": "user", "content": str("hello I'm Ibrahim")}
]


response = client.chat.completions.create(
    model=os.getenv("MODEL_GPT_ID"),
    messages=messages
)

print(response.choices[0].message.content)

Hello, Ibrahim! 😊 How can I assist you today?


In [67]:
# Load the LLM
from langchain.chat_models import AzureChatOpenAI

model_name = os.getenv("MODEL_GPT_ID")
azure_openai_api_key = os.getenv("AZURE_OPENAI_GPT_API_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_GPT_ENDPOINT")
os.environ["AZURE_OPENAI_API_KEY"]=os.getenv("AZURE_OPENAI_GPT_API_KEY") 
os.environ["AZURE_OPENAI_ENDPOINT"]=os.getenv("AZURE_OPENAI_GPT_ENDPOINT") 



llm = AzureChatOpenAI(
    openai_api_version="2024-07-01-preview",
    azure_deployment=model_name,
    model_name=model_name,
    temperature=0.0)

### **1. SQL query chain**

In [68]:
from langchain.chains import create_sql_query_chain
import re

chain = create_sql_query_chain(llm, db)
response = chain.invoke({"question": "How many cars are there"})


if isinstance(response, dict):
    sql_query = response.get("query") or response.get("sql") or ""
else:
    sql_query = response

# Remove Markdown code block formatting (e.g., ```sql ... ```)
sql_query_clean = re.sub(r"```(?:\w+)?\n", "", sql_query).strip()
sql_query_clean = sql_query_clean.rstrip("```").strip()

print( sql_query_clean)


SELECT COUNT(*) AS "Total_Cars" FROM "cars";


In [69]:
db.run(sql_query_clean)

'[(214,)]'

In [70]:
chain.get_prompts()[0].pretty_print()

You are a SQLite expert. Given an input question, first create a syntactically correct SQLite query to run, then look at the results of the query and return the answer to the input question.
Unless the user specifies in the question a specific number of examples to obtain, query for at most 5 results using the LIMIT clause as per SQLite. You can order the results to return the most informative data in the database.
Never query for all columns from a table. You must query only the columns that are needed to answer the question. Wrap each column name in double quotes (") to denote them as delimited identifiers.
Pay attention to use only the column names you can see in the tables below. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.
Pay attention to use date('now') function to get the current date, if the question involves "today".

Use the following format:

Question: Question here
SQLQuery: SQL Query to run
SQLResult: Result

### **Add QuerySQLDataBaseTool to the chain**
Execute SQL query

**This is the most dangerous part of creating a SQL chain.** Consider carefully if it is OK to run automated queries over your data. Minimize the database connection permissions as much as possible. Consider adding a human approval step to you chains before query execution (see below).

We can use the QuerySQLDatabaseTool to easily add query execution to our chain:

In [71]:
import re
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain

# Define a function to remove Markdown formatting from the SQL query.
def remove_markdown(sql_text: str) -> str:
    # Remove the starting triple backticks and any language specifier (like "sql")
    cleaned = re.sub(r"```(?:sql)?\n", "", sql_text)
    # Remove any trailing triple backticks
    cleaned = re.sub(r"```", "", cleaned)
    return cleaned.strip()

# Create the query-writing chain.
write_query = create_sql_query_chain(llm, db)

# Invoke the chain to generate the SQL query.
raw_response = write_query.invoke({"question": "How many cars are there"})

# Depending on the chain's output format, extract the SQL query.
if isinstance(raw_response, str):
    raw_query = raw_response
elif isinstance(raw_response, dict):
    # Adjust the key name as needed.
    raw_query = raw_response.get("query", "")
else:
    raw_query = str(raw_response)

# Clean the SQL query by removing Markdown formatting.
clean_query = remove_markdown(raw_query)

# Now execute the clean query.
execute_query = QuerySQLDataBaseTool(db=db)
result = execute_query.invoke({"query": clean_query})

print("Generated SQL Query:")
print(clean_query)
print("Execution Result:")
print(result)


Generated SQL Query:
SELECT COUNT(*) AS "Total_Cars" FROM cars;
Execution Result:
[(214,)]


### **Answer the question in a user friendly manner**

In [72]:
import re
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

# Define a function to remove markdown formatting from SQL query text.
def remove_markdown(sql_text: str) -> str:
    # Remove starting triple backticks and optional "sql" language tag.
    cleaned = re.sub(r"```(?:sql)?\n", "", sql_text)
    # Remove any trailing triple backticks.
    cleaned = re.sub(r"```", "", cleaned)
    return cleaned.strip()

# Create a runnable version of the markdown removal function.
remove_markdown_runnable = RunnableLambda(remove_markdown)

# Your answer prompt remains the same.
answer_prompt = PromptTemplate.from_template(
"""Given the following user question, corresponding SQL query, and SQL result, answer the user question.

Question: {question}
SQL Query: {query}
SQL Result: {result}
Answer: """
)

# Build the answer chain.
answer = answer_prompt | llm | StrOutputParser()

chain = (
    RunnablePassthrough.assign(query=write_query | remove_markdown_runnable)
    .assign(result=itemgetter("query") | execute_query)
    | answer
)

# Invoke the chain.
final_response = chain.invoke({"question": "How many cars are there"})
print(final_response)


There are 214 cars.


### **2. Agents**

Agent which provides a more flexible way of interacting with SQL databases. The main advantages of using the SQL Agent are:

- It can answer questions based on the databases’ schema as well as on the databases’ content (like describing a specific table).
- It can recover from errors by running a generated query, catching the traceback and regenerating it correctly.
- It can answer questions that require multiple dependent queries.
- It will save tokens by only considering the schema from relevant tables.

To initialize the agent, we use create_sql_agent function. This agent contains the SQLDatabaseToolkit which contains tools to:

- Create and execute queries
- Check query syntax
- Retrieve table descriptions
- …

In [41]:
from langchain_community.agent_toolkits import create_sql_agent
agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)

agent_executor.invoke(
    {
        "input": " Which car has high price ?"
    }
)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcars[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'cars'}`


[0m[33;1m[1;3m
CREATE TABLE cars (
	"Make" TEXT, 
	"Model" TEXT, 
	"Year" INTEGER, 
	"Trim" TEXT, 
	"Trim_(description)" TEXT, 
	"Body_type" TEXT, 
	"Cylinders" TEXT, 
	"Drive_type" TEXT, 
	"Engine_type" TEXT, 
	"Range_in_miles_(city/hwy)" TEXT, 
	"Base_Invoice" TEXT
)

/*
3 rows from cars table:
Make	Model	Year	Trim	Trim_(description)	Body_type	Cylinders	Drive_type	Engine_type	Range_in_miles_(city/hwy)	Base_Invoice
BMW	3 Series	2018	330i xDrive	330i xDrive 4dr Sedan AWD (2.0L 4cyl Turbo 8A)	Sedan	I4	all wheel drive	gas	363.4/521.4 mi.	$39,795
BMW	3 Series	2018	340i	340i 4dr Sedan (3.0L 6cyl Turbo 8A)	Sedan	I6	rear wheel drive	gas	331.8/505.6 mi.	$46,025
BMW	3 Series	2018	340i xDrive	340i xDrive 4dr Sedan AWD (3.0L 6cyl Turbo 8A)	Sedan	I6	all wheel drive	gas	331.8/489.8 mi.	

{'input': ' Which car has high price ?',
 'output': 'The cars with the highest prices are:\n\n1. **BMW 3 Series (2018, M3)** - $91,875\n2. **Ford F-150 (2024, Raptor)** - $73,239\n3. **Ford F-150 (2024, King Ranch)** - $69,228\n4. **Ford F-150 (2024, Platinum)** - $69,228\n5. **Ford F-150 (2024, King Ranch)** - $68,943\n6. **Ford F-150 (2024, Platinum)** - $68,943\n7. **Ford F-150 (2020, Limited)** - $66,462\n8. **BMW 3 Series (2012, M3)** - $63,250\n9. **Ford F-150 (2020, Limited)** - $63,312\n10. **BMW 3 Series (2018, M3)** - $62,345'}

In [42]:
agent_executor.invoke({"input": "Describe the cars table"})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mcars[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'cars'}`


[0m[33;1m[1;3m
CREATE TABLE cars (
	"Make" TEXT, 
	"Model" TEXT, 
	"Year" INTEGER, 
	"Trim" TEXT, 
	"Trim_(description)" TEXT, 
	"Body_type" TEXT, 
	"Cylinders" TEXT, 
	"Drive_type" TEXT, 
	"Engine_type" TEXT, 
	"Range_in_miles_(city/hwy)" TEXT, 
	"Base_Invoice" TEXT
)

/*
3 rows from cars table:
Make	Model	Year	Trim	Trim_(description)	Body_type	Cylinders	Drive_type	Engine_type	Range_in_miles_(city/hwy)	Base_Invoice
BMW	3 Series	2018	330i xDrive	330i xDrive 4dr Sedan AWD (2.0L 4cyl Turbo 8A)	Sedan	I4	all wheel drive	gas	363.4/521.4 mi.	$39,795
BMW	3 Series	2018	340i	340i 4dr Sedan (3.0L 6cyl Turbo 8A)	Sedan	I6	rear wheel drive	gas	331.8/505.6 mi.	$46,025
BMW	3 Series	2018	340i xDrive	340i xDrive 4dr Sedan AWD (3.0L 6cyl Turbo 8A)	Sedan	I6	all wheel drive	gas	331.8/489.8 mi.	

{'input': 'Describe the cars table',
 'output': 'The `cars` table contains the following columns:\n\n1. **Make**: The manufacturer of the car (e.g., BMW).\n2. **Model**: The model name of the car (e.g., 3 Series).\n3. **Year**: The year the car was manufactured.\n4. **Trim**: The trim level of the car.\n5. **Trim_(description)**: A detailed description of the trim level.\n6. **Body_type**: The type of car body (e.g., Sedan).\n7. **Cylinders**: The number of cylinders in the engine (e.g., I4, I6).\n8. **Drive_type**: The type of drivetrain (e.g., all-wheel drive, rear-wheel drive).\n9. **Engine_type**: The type of engine (e.g., gas).\n10. **Range_in_miles_(city/hwy)**: The range of the car in miles for city and highway driving.\n11. **Base_Invoice**: The base invoice price of the car.\n\nExample rows from the table:\n- BMW 3 Series (2018), Trim: 330i xDrive, Body Type: Sedan, Cylinders: I4, Drive Type: all-wheel drive, Engine Type: gas, Range: 363.4/521.4 mi., Base Invoice: $39,795.\n- 

In [None]:
import os
from typing import List, Tuple
from utils.load_config import LoadConfig
from langchain_community.utilities import SQLDatabase
from langchain.chains import create_sql_query_chain
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain_core.prompts import PromptTemplate
from langchain_core.output_parsers import StrOutputParser
from langchain_core.runnables import RunnablePassthrough
from operator import itemgetter
from sqlalchemy import create_engine
from langchain_community.agent_toolkits import create_sql_agent
import langchain
langchain.debug = True

APPCFG = LoadConfig()


class ChatBot:
    """
    A ChatBot class capable of responding to messages using different modes of operation.
    It can interact with SQL databases, leverage language chain agents for Q&A,
    and use embeddings for Retrieval-Augmented Generation (RAG) with ChromaDB.
    """
    @staticmethod
    def respond(chatbot: List, message: str, chat_type: str, app_functionality: str) -> Tuple:
        """
        Respond to a message based on the given chat and application functionality types.

        Args:
            chatbot (List): A list representing the chatbot's conversation history.
            message (str): The user's input message to the chatbot.
            chat_type (str): Describes the type of the chat (interaction with SQL DB or RAG).
            app_functionality (str): Identifies the functionality for which the chatbot is being used (e.g., 'Chat').

        Returns:
            Tuple[str, List, Optional[Any]]: A tuple containing an empty string, the updated chatbot conversation list,
                                             and an optional 'None' value. The empty string and 'None' are placeholder
                                             values to match the required return type and may be updated for further functionality.
                                             Currently, the function primarily updates the chatbot conversation list.
        """
        if app_functionality == "Chat":
            # If we want to use langchain agents for Q&A with our SQL DBs that was created from .sql files.
            if chat_type == "Q&A with stored SQL-DB":
                # directories
                if os.path.exists(APPCFG.sqldb_directory):
                    db = SQLDatabase.from_uri(
                        f"sqlite:///{APPCFG.sqldb_directory}")
                    execute_query = QuerySQLDataBaseTool(db=db)
                    write_query = create_sql_query_chain(
                        APPCFG.langchain_llm, db)
                    answer_prompt = PromptTemplate.from_template(
                        APPCFG.agent_llm_system_role)
                    answer = answer_prompt | APPCFG.langchain_llm | StrOutputParser()
                    chain = (
                        RunnablePassthrough.assign(query=write_query).assign(
                            result=itemgetter("query") | execute_query
                        )
                        | answer
                    )
                    response = chain.invoke({"question": message})

                else:
                    chatbot.append(
                        (message, f"SQL DB does not exist. Please first create the 'sqldb.db'."))
                    return "", chatbot, None
            # If we want to use langchain agents for Q&A with our SQL DBs that were created from CSV/XLSX files.
            elif chat_type == "Q&A with Uploaded CSV/XLSX SQL-DB" or chat_type == "Q&A with stored CSV/XLSX SQL-DB":
                if chat_type == "Q&A with Uploaded CSV/XLSX SQL-DB":
                    if os.path.exists(APPCFG.uploaded_files_sqldb_directory):
                        engine = create_engine(
                            f"sqlite:///{APPCFG.uploaded_files_sqldb_directory}")
                        db = SQLDatabase(engine=engine)
                        print(db.dialect)
                    else:
                        chatbot.append(
                            (message, f"SQL DB from the uploaded csv/xlsx files does not exist. Please first upload the csv files from the chatbot."))
                        return "", chatbot, None
                elif chat_type == "Q&A with stored CSV/XLSX SQL-DB":
                    if os.path.exists(APPCFG.stored_csv_xlsx_sqldb_directory):
                        engine = create_engine(
                            f"sqlite:///{APPCFG.stored_csv_xlsx_sqldb_directory}")
                        db = SQLDatabase(engine=engine)
                    else:
                        chatbot.append(
                            (message, f"SQL DB from the stored csv/xlsx files does not exist. Please first execute `src/prepare_csv_xlsx_sqlitedb.py` module."))
                        return "", chatbot, None
                print(db.dialect)
                print(db.get_usable_table_names())
                agent_executor = create_sql_agent(
                    APPCFG.langchain_llm, db=db, agent_type="openai-tools", verbose=True)
                response = agent_executor.invoke({"input": message})
                response = response["output"]

            elif chat_type == "RAG with stored CSV/XLSX ChromaDB":
                response = APPCFG.azure_openai_client.embeddings.create(
                    input=message,
                    model=APPCFG.embedding_model_name
                )
                query_embeddings = response.data[0].embedding
                vectordb = APPCFG.chroma_client.get_collection(
                    name=APPCFG.collection_name)
                results = vectordb.query(
                    query_embeddings=query_embeddings,
                    n_results=APPCFG.top_k
                )
                prompt = f"User's question: {message} \n\n Search results:\n {results}"

                messages = [
                    {"role": "system", "content": str(
                        APPCFG.rag_llm_system_role
                    )},
                    {"role": "user", "content": prompt}
                ]
                llm_response = APPCFG.azure_openai_client.chat.completions.create(
                    model=APPCFG.model_name,
                    messages=messages
                )
                response = llm_response.choices[0].message.content

            # Get the `response` variable from any of the selected scenarios and pass it to the user.
            chatbot.append(
                (message, response))
            return "", chatbot
        else:
            pass


In [57]:
role = """ Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n
Question: {question}\n
SQL Query: {query}\n
SQL Result: {result}\n
Answer: """

def remove_markdown(sql_text: str) -> str:
    # Remove the starting triple backticks and any language specifier (like "sql")
    cleaned = re.sub(r"```(?:sql)?\n", "", sql_text)
    # Remove any trailing triple backticks
    cleaned = re.sub(r"```", "", cleaned)
    return cleaned.strip()

In [None]:
# Load the LLM
from langchain.chat_models import AzureChatOpenAI
import re
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda

model_name = os.getenv("MODEL_GPT_ID")
azure_openai_api_key = os.getenv("AZURE_OPENAI_GPT_API_KEY")
azure_openai_endpoint = os.getenv("AZURE_OPENAI_GPT_ENDPOINT")
os.environ["AZURE_OPENAI_API_KEY"]=os.getenv("AZURE_OPENAI_GPT_API_KEY") 
os.environ["AZURE_OPENAI_ENDPOINT"]=os.getenv("AZURE_OPENAI_GPT_ENDPOINT") 



llm = AzureChatOpenAI(
    openai_api_version="2024-07-01-preview",
    azure_deployment=model_name,
    model_name=model_name,
    temperature=0.0)



db_path = str(here("data")) + "/data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")    


remove_markdown_runnable = RunnableLambda(remove_markdown)
answer_prompt = PromptTemplate.from_template(role)
execute_query = QuerySQLDataBaseTool(db=db)
write_query = create_sql_query_chain(llm, db)

answer = answer_prompt | llm | StrOutputParser()
chain = (
    RunnablePassthrough.assign(query=write_query).assign(
        result=itemgetter("query") | execute_query
    )
    | answer
)
response = chain.invoke({"question": " Which car has high price ?"})




In [54]:
chat_sql(db , llm ,  ,role)

'The SQL query provided contains syntax errors, which caused it to fail. Specifically, the use of triple backticks (```sql) is not valid SQL syntax and should be removed. Additionally, the query attempts to manipulate the "Base_Invoice" column, but the exact structure of the column (e.g., its data type and format) is not clear from the context.\n\nTo fix the query, we need to ensure proper syntax and confirm the format of the "Base_Invoice" column. Assuming "Base_Invoice" is a string that starts with a currency symbol (e.g., "$") and may contain commas, the corrected query would look like this:\n\n```sql\nSELECT "Make", "Model", "Year", "Trim", "Base_Invoice"\nFROM cars\nORDER BY CAST(REPLACE(SUBSTR("Base_Invoice", 2), \',\', \'\') AS INTEGER) DESC\nLIMIT 1;\n```\n\nIf the corrected query is executed successfully, it will return the car with the highest price based on the "Base_Invoice" column. However, since the original query failed, we cannot provide the answer to the user\'s questi

In [78]:
import re
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda




llm = AzureChatOpenAI(
    openai_api_version="2024-07-01-preview",
    azure_deployment=model_name,
    model_name=model_name,
    temperature=0.0)



db_path = str(here("data")) + "/data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")   




# Define a function to remove markdown formatting from SQL query text.
def remove_markdown(sql_text: str) -> str:
    # Remove starting triple backticks and optional "sql" language tag.
    cleaned = re.sub(r"```(?:sql)?\n", "", sql_text)
    # Remove any trailing triple backticks.
    cleaned = re.sub(r"```", "", cleaned)
    return cleaned.strip()

# Create a runnable version of the markdown removal function.
remove_markdown_runnable = RunnableLambda(remove_markdown)


def chat_sql(llm , db ,  message):
    
    remove_markdown_runnable = RunnableLambda(remove_markdown)
    # Your answer prompt remains the same.
    answer_prompt = PromptTemplate.from_template(
        """Given the following user question, corresponding SQL query, and SQL result, answer the user question.
    
    Question: {question}
    SQL Query: {query}
    SQL Result: {result}
    Answer: """
    )
    
    # Build the answer chain.
    answer = answer_prompt | llm | StrOutputParser()
    
    chain = (
        RunnablePassthrough.assign(query=write_query | remove_markdown_runnable)
        .assign(result=itemgetter("query") | execute_query)
        | answer
    )
    
    # Invoke the chain.
    final_response = chain.invoke({"question": message})
    print(final_response)


In [79]:
message =" Which car has high price ?"

In [80]:
chat_sql(llm , db ,  message)

The car with the highest price is the **BMW 3 Series (2018, M3 trim)**, with a base invoice price of **$91,875**.


In [None]:

prompt = f"User's question: {message} \n\n Search results:\n {results}"

messages = [
    {"role": "system", "content": str(
        APPCFG.rag_llm_system_role
    )},
    {"role": "user", "content": prompt}
]
llm_response = APPCFG.azure_openai_client.chat.completions.create(
    model=APPCFG.model_name,
    messages=messages
)
response = llm_response.choices[0].message.content

Environment variables are loaded: True


In [2]:
import os
from dotenv import load_dotenv

print("Environment variables are loaded:", load_dotenv())
model_name = os.getenv("GENERATION_MODEL_ID")
os.environ["GROQ_API_KEY"] = os.getenv("GROQ_API_KEY")
model_name

Environment variables are loaded: True


'llama-3.3-70b-specdec'

In [19]:
from langchain_groq import ChatGroq
import logging
from operator import itemgetter
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_community.utilities import SQLDatabase
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain
import re
import os
from langchain_groq import ChatGroq


# Define a function to remove Markdown formatting from the SQL query.
def remove_markdown(sql_text: str) -> str:

    cleaned =  sql_text.strip()
    # Split the string into lines
    lines = cleaned.split('\n')
    # Create a dictionary
    data_dict = {}
    for line in lines:
        key, value = line.split(": ", 1)
        data_dict[key] = value
    return data_dict

def str_to_dict(text):
    # Split the string into lines
    lines = text.split('\n')
    # Create a dictionary
    data_dict = {}
    for line in lines:
        key, value = line.split(": ", 1)
        data_dict[key] = value
    return data_dict["query"]



llm = ChatGroq(
    model= os.getenv("GENERATION_MODEL_ID"),
    temperature=0,
)
database_sql_path = "data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{database_sql_path}")


def chat_agent_with_sql(message:str )-> str:
     
    remove_markdown_runnable = RunnableLambda(remove_markdown)
 


    prompt = """Given the following user question, corresponding SQL query, and SQL result, answer the user question.
    
                Question: {question}
                SQL Query: {query}
                SQL Result: {result}
                Answer: """
    
    answer_prompt = PromptTemplate.from_template(prompt)
    
        
    
    
    execute_query = QuerySQLDataBaseTool(db=  db)
    
    write_query = create_sql_query_chain(
                  llm, 
                  db
                )
    
    answer =   answer_prompt |   llm  | StrOutputParser()
    
    chain = (
    RunnablePassthrough.assign(query=write_query |   remove_markdown_runnable  ) 
            .assign(result=itemgetter("query") |   execute_query)
            |   answer
        )
    response =   chain.invoke({"question": message})
    
    return   response         


In [20]:
chat_agent_with_sql( " Which car has high price ?")

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


ValidationError: 1 validation error for _QuerySQLDataBaseToolInput
query
  field required (type=value_error.missing)

In [11]:
import re
import os
import logging
from operator import itemgetter

# استيراد المكونات من LangChain
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_community.utilities import SQLDatabase
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain

# ضبط مستوى السجل (Logging) إن أردت تتبع التنفيذ
logging.basicConfig(level=logging.INFO)


def remove_markdown(sql_text: str) -> str:
    """
    تزيل دالة remove_markdown تنسيق Markdown من نص استعلام SQL.
    يتم إزالة العلامات الثلاثية (```)، وأي محدد لغة (مثل "sql") في البداية والنهاية.
    """
    # إزالة العلامات الثلاثية الافتتاحية مع محدد اللغة إن وجد
    cleaned = re.sub(r"```(?:sql)?\n", "", sql_text)
    # إزالة العلامات الثلاثية الختامية
    cleaned = re.sub(r"```", "", cleaned)
    return cleaned.strip()


def str_to_dict(text: str) -> dict:
    """
    تحول هذه الدالة النص إلى قاموس (dict) عن طريق تقسيمه على الأسطر،
    ومن ثم تقسيم كل سطر عند أول ظهور لـ ": " للحصول على المفتاح والقيمة.
    في حال لم يحتوي السطر على ": "، يتم تجاهله.
    """
    data_dict = {}
    for line in text.splitlines():
        # تجاهل السطور الفارغة
        if not line.strip():
            continue
        parts = line.split(": ", 1)
        if len(parts) == 2:
            key, value = parts
            data_dict[key.strip()] = value.strip()
        else:
            logging.warning("Line '%s' does not contain key-value separator.", line)
    return data_dict


# تهيئة النموذج اللغوي باستخدام ChatGroq
llm = ChatGroq(
    model=os.getenv("GENERATION_MODEL_ID"),
    temperature=0,
)

# إعداد قاعدة البيانات باستخدام SQLDatabase (الاتصال بقاعدة بيانات SQLite هنا)
database_sql_path = "data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{database_sql_path}")


def chat_agent_with_sql(message: str) -> str:
    """
    دالة chat_agent_with_sql:
      1. تولد استعلام SQL من سؤال المستخدم باستخدام سلسلة write_query.
      2. تقوم بتنظيف الاستعلام من تنسيق Markdown.
      3. تحويل النتيجة إلى قاموس باستخدام str_to_dict.
      4. تنفيذ الاستعلام على قاعدة البيانات.
      5. استخدام قالب (PromptTemplate) ونموذج لغوي لتوليد الإجابة النهائية.
    """
    # تغليف الدوال remove_markdown و str_to_dict لتصبح Runnable باستخدام RunnableLambda.
    remove_markdown_runnable = RunnableLambda(remove_markdown)
    str_to_dict_runnable = RunnableLambda(str_to_dict)

    # إنشاء قالب الطلب (Prompt) لتوليد الإجابة النهائية.
    prompt_text = (
        "Given the following user question, corresponding SQL query, and SQL result, "
        "answer the user question.\n\n"
        "Question: {question}\n"
        "SQL Query: {query}\n"
        "SQL Result: {result}\n"
        "Answer: "
    )
    answer_prompt = PromptTemplate.from_template(prompt_text)

    # أداة تنفيذ استعلامات SQL على قاعدة البيانات.
    execute_query = QuerySQLDataBaseTool(db=db)

    # إنشاء سلسلة توليد استعلام SQL من سؤال المستخدم.
    write_query = create_sql_query_chain(llm, db)

    # إنشاء سلسلة الإجابة باستخدام نموذج لغوي وقالب الطلب.
    answer_chain = answer_prompt | llm | StrOutputParser()

    # تجميع السلسلة النهائية:
    # 1. نستخدم RunnablePassthrough لإضافة حقل "query" عبر:
    #    - توليد الاستعلام باستخدام write_query.
    #    - تنظيف الاستعلام باستخدام remove_markdown_runnable.
    #    - تحويل النص إلى قاموس باستخدام str_to_dict_runnable.
    # 2. نستخدم assign لإضافة حقل "result" عبر استخراج "query" وتنفيذها على قاعدة البيانات.
    # 3. أخيرًا، تمرير كل هذه المعلومات إلى سلسلة الإجابة لتوليد النتيجة النهائية.
    chain = (
        RunnablePassthrough
        .assign(query=write_query | remove_markdown_runnable | str_to_dict_runnable)
        .assign(result=itemgetter("query") | execute_query)
        | answer_chain
    )

    # استدعاء السلسلة النهائية مع تمرير سؤال المستخدم.
    response = chain.invoke({"question": message})
    return response


# اختبار الدالة وطباعة النتيجة.
if __name__ == "__main__":
    test_question = "How many cars are there"
    final_response = chat_agent_with_sql(test_question)
    print("Final Response:")
    print(final_response)


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


ValidationError: 1 validation error for _QuerySQLDataBaseToolInput
query
  field required (type=value_error.missing)

In [12]:
import re
import os
import logging
from operator import itemgetter
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_community.utilities import SQLDatabase
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain

# ضبط مستوى السجل (اختياري)
logging.basicConfig(level=logging.INFO)

# تعريف دالة إزالة تنسيق Markdown من نص استعلام SQL.
def remove_markdown(sql_text: str) -> str:
    # إزالة العلامات الثلاثية الافتتاحية مع محدد اللغة (مثل "sql")
    cleaned = re.sub(r"```(?:sql)?\n", "", sql_text)
    # إزالة العلامات الثلاثية الختامية
    cleaned = re.sub(r"```", "", cleaned)
    return cleaned.strip()

# دالة لتحويل نص إلى قاموس بناءً على تقسيمه على الأسطر والمفاتيح.
def str_to_dict(text: str) -> dict:
    data_dict = {}
    for line in text.splitlines():
        if not line.strip():
            continue
        parts = line.split(": ", 1)
        if len(parts) == 2:
            key, value = parts
            data_dict[key.strip()] = value.strip()
        else:
            logging.warning("Line '%s' does not contain key-value separator.", line)
    return data_dict

# تغليف الدوال remove_markdown و str_to_dict لتصبح Runnable.
remove_markdown_runnable = RunnableLambda(remove_markdown)
str_to_dict_runnable = RunnableLambda(str_to_dict)

# تهيئة النموذج اللغوي باستخدام ChatGroq
llm = ChatGroq(
    model=os.getenv("GENERATION_MODEL_ID"),
    temperature=0,
)

# إعداد قاعدة البيانات (قاعدة بيانات SQLite في هذه الحالة)
database_sql_path = "data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{database_sql_path}")

# دالة لتغليف القيمة في قاموس يحتوي على المفتاح "query"
wrap_query = RunnableLambda(lambda q: {"query": q})

def chat_agent_with_sql(message: str) -> str:
    """
    دالة تقوم بتوليد استعلام SQL من سؤال المستخدم، تنظيف الاستعلام، وتنفيذه على قاعدة البيانات،
    ثم توليد إجابة باستخدام قالب ونموذج لغوي.
    """
    # إنشاء قالب الإجابة
    prompt_text = (
        "Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n\n"
        "Question: {question}\n"
        "SQL Query: {query}\n"
        "SQL Result: {result}\n"
        "Answer: "
    )
    answer_prompt = PromptTemplate.from_template(prompt_text)
    
    # أداة تنفيذ استعلامات SQL على قاعدة البيانات.
    execute_query = QuerySQLDataBaseTool(db=db)
    
    # إنشاء سلسلة توليد استعلام SQL من سؤال المستخدم.
    write_query = create_sql_query_chain(llm, db)
    
    # إنشاء سلسلة الإجابة.
    answer_chain = answer_prompt | llm | StrOutputParser()
    
    # تجميع السلسلة النهائية:
    # - يتم توليد الاستعلام باستخدام write_query،
    # - تنظيفه عبر remove_markdown_runnable،
    # - تحويله إلى قاموس باستخدام str_to_dict_runnable،
    # - ثم استخراج قيمة "query" وتمريرها عبر wrap_query إلى execute_query،
    # - وأخيراً يتم تمرير كل هذه المعلومات إلى سلسلة الإجابة.
    chain = (
        RunnablePassthrough
        .assign(query=write_query | remove_markdown_runnable | str_to_dict_runnable)
        .assign(result=itemgetter("query") | wrap_query | execute_query)
        | answer_chain
    )
    
    response = chain.invoke({"question": message})
    return response

if __name__ == "__main__":
    test_question = "How many cars are there"
    final_response = chat_agent_with_sql(test_question)
    print("Final Response:")
    print(final_response)


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


ValidationError: 1 validation error for _QuerySQLDataBaseToolInput
query
  str type expected (type=type_error.str)

In [21]:
import re
import os
import logging
from operator import itemgetter

# استيراد المكونات المطلوبة من LangChain
from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_community.utilities import SQLDatabase
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain

# ضبط إعدادات التسجيل (Logging)
logging.basicConfig(level=logging.INFO)

def parse_markdown_to_dict(sql_text: str) -> dict:
    """
    تقوم هذه الدالة بإزالة تنسيق Markdown من نص استعلام SQL
    وتحويله إلى قاموس (dict) عن طريق تقسيم السطور حسب الفاصل ": ".
    
    يُفترض أن يكون كل سطر في النص على الشكل:
       key: value

    إذا حدث خطأ أثناء التقسيم، يتم تسجيل تحذير ويتم تجاهل السطر.
    """
    cleaned = sql_text.strip()
    data_dict = {}
    for line in cleaned.splitlines():
        # تجاهل السطور الفارغة
        if not line.strip():
            continue
        try:
            key, value = line.split(": ", 1)
            data_dict[key.strip()] = value.strip()
        except ValueError:
            logging.warning("Cannot parse line: '%s'. Expected format 'key: value'.", line)
    return data_dict

# تغليف الدالة السابقة لتصبح Runnable باستخدام RunnableLambda
parse_markdown_runnable = RunnableLambda(parse_markdown_to_dict)

# تهيئة النموذج اللغوي باستخدام ChatGroq مع قراءة معرّف النموذج من متغيرات البيئة
llm = ChatGroq(
    model=os.getenv("GENERATION_MODEL_ID"),
    temperature=0,
)

# إعداد قاعدة البيانات باستخدام SQLDatabase (الاتصال بقاعدة بيانات SQLite هنا)
database_sql_path = "data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{database_sql_path}")

def chat_agent_with_sql(message: str) -> str:
    """
    تقوم هذه الدالة بما يلي:
      1. توليد استعلام SQL من سؤال المستخدم باستخدام سلسلة write_query.
      2. تحويل الاستعلام الناتج (الذي قد يكون بتنسيق Markdown) إلى قاموس.
      3. استخراج مفتاح "query" من القاموس وتمريره إلى أداة تنفيذ الاستعلامات.
      4. استخدام قالب (PromptTemplate) ونموذج لغوي لتوليد الإجابة النهائية.
    """
    # إعداد قالب الإجابة النهائي
    prompt_text = (
        "Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n\n"
        "Question: {question}\n"
        "SQL Query: {query}\n"
        "SQL Result: {result}\n"
        "Answer: "
    )
    answer_prompt = PromptTemplate.from_template(prompt_text)
    
    # أداة تنفيذ استعلامات SQL على قاعدة البيانات.
    execute_query = QuerySQLDataBaseTool(db=db)
    
    # إنشاء سلسلة لتوليد استعلام SQL من سؤال المستخدم.
    write_query = create_sql_query_chain(llm, db)
    
    # إنشاء سلسلة الإجابة النهائية: 
    # يتم تمرير الناتج عبر:
    # - RunnablePassthrough.assign لإضافة مفتاح "query" يتم توليده باستخدام write_query
    #   ويتم تنظيفه وتحويله إلى قاموس باستخدام parse_markdown_runnable.
    # - ثم استخراج المفتاح "query" (عن طريق itemgetter) وتغليفه في قاموس (باستخدام RunnableLambda)
    #   ليتم تمريره إلى execute_query، والتي تنفذ الاستعلام على قاعدة البيانات.
    # - وأخيرًا، يتم تمرير كل هذه المعلومات إلى سلسلة الإجابة (answer_prompt | llm | StrOutputParser).
    chain = (
        RunnablePassthrough
        .assign(query=write_query | parse_markdown_runnable)
        .assign(result=itemgetter("query") | RunnableLambda(lambda q: {"query": q}) | execute_query)
        | (answer_prompt | llm | StrOutputParser())
    )
    
    response = chain.invoke({"question": message})
    return response

if __name__ == "__main__":
    test_question = "How many cars are there"
    final_response = chat_agent_with_sql(test_question)
    print("Final Response:")
    print(final_response)


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


ValidationError: 1 validation error for _QuerySQLDataBaseToolInput
query
  str type expected (type=type_error.str)

In [22]:
import re
import os
import logging
from operator import itemgetter

from langchain_groq import ChatGroq
from langchain_core.output_parsers import StrOutputParser
from langchain_core.prompts import PromptTemplate
from langchain_core.runnables import RunnablePassthrough, RunnableLambda
from langchain_community.utilities import SQLDatabase
from langchain_community.tools.sql_database.tool import QuerySQLDataBaseTool
from langchain.chains import create_sql_query_chain

logging.basicConfig(level=logging.INFO)

# دالة إزالة تنسيق Markdown من استعلام SQL وتحويله إلى قاموس.
def remove_markdown(sql_text: str) -> dict:
    cleaned = sql_text.strip()
    lines = cleaned.split('\n')
    data_dict = {}
    for line in lines:
        # يُفترض أن يكون كل سطر على الشكل "key: value"
        try:
            key, value = line.split(": ", 1)
            data_dict[key.strip()] = value.strip()
        except ValueError:
            logging.warning("Line '%s' cannot be parsed. Skipping.", line)
    return data_dict

# دالة استخراج قيمة "query" من القاموس وتحويلها إلى string.
def extract_query(data: dict) -> str:
    return data.get("query", "")

# تغليف الدوال لتصبح Runnable.
remove_markdown_runnable = RunnableLambda(remove_markdown)
extract_query_runnable = RunnableLambda(extract_query)

# إعداد قالب الإجابة النهائي.
prompt_text = (
    "Given the following user question, corresponding SQL query, and SQL result, answer the user question.\n\n"
    "Question: {question}\n"
    "SQL Query: {query}\n"
    "SQL Result: {result}\n"
    "Answer: "
)
answer_prompt = PromptTemplate.from_template(prompt_text)

# تهيئة أداة تنفيذ استعلامات SQL.
database_sql_path = "data_cars_price.db"
db = SQLDatabase.from_uri(f"sqlite:///{database_sql_path}")
execute_query = QuerySQLDataBaseTool(db=db)

# إنشاء نموذج لغوي باستخدام ChatGroq.
llm = ChatGroq(
    model=os.getenv("GENERATION_MODEL_ID"),
    temperature=0,
)

# إنشاء سلسلة توليد استعلام SQL.
write_query = create_sql_query_chain(llm, db)

# إنشاء سلسلة الإجابة النهائية.
answer_chain = answer_prompt | llm | StrOutputParser()

# تجميع السلسلة النهائية:
# 1. توليد الاستعلام باستخدام write_query.
# 2. تنظيف الاستعلام من تنسيق Markdown باستخدام remove_markdown_runnable.
# 3. استخراج قيمة "query" من القاموس باستخدام extract_query_runnable.
# 4. ثم استخراج المفتاح "query" من المدخلات وتطبيق execute_query على النص.
# 5. أخيرًا، تمرير كل هذه البيانات إلى سلسلة الإجابة لتوليد الإجابة النهائية.
chain = (
    RunnablePassthrough.assign(query=write_query | remove_markdown_runnable | extract_query_runnable)
    .assign(result=itemgetter("query") | execute_query)
    | answer_chain
)

def chat_agent_with_sql(message: str) -> str:
    response = chain.invoke({"question": message})
    return response

if __name__ == "__main__":
    test_question = "How many cars are there"
    final_response = chat_agent_with_sql(test_question)
    print("Final Response:")
    print(final_response)


INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


Final Response:
To provide an accurate answer, I would need the SQL query and the SQL result. However, since they are not provided, I'll guide you through a general approach to answering the question "How many cars are there" based on a hypothetical scenario.

If the SQL query was designed to count the number of cars, it might look something like this:

```sql
SELECT COUNT(*) AS number_of_cars
FROM cars;
```

Assuming the SQL result from this query is a table with one row and one column, like so:

| number_of_cars |
|----------------|
| 100            |

The answer to the user's question "How many cars are there" would be based on the result of the SQL query.

Answer: There are 100 cars.


In [24]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=False)
st =agent_executor.invoke(
    {
        "input":  "How many car are there"
    }
)

INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"
INFO:httpx:HTTP Request: POST https://api.groq.com/openai/v1/chat/completions "HTTP/1.1 200 OK"


In [25]:
type(st)

dict

In [28]:
st.get('output')

'There are 214 cars in the database.'