In [10]:

from autogen import GroupChat, GroupChatManager, register_function
from typing import Annotated
import sqlite3
from typing import Annotated
import autogen
import json
import gradio as gr
import warnings
warnings.filterwarnings("ignore")
import pandas as pd
import re


In [11]:
global config_list
config_list = {
    "model": "llama-3.3-70b-versatile",
    "api_key": "#######",
    "api_type": "groq"
}

In [12]:
def fetch_schema(DB_PATH: Annotated[str, "Path to the Database file"]) -> Annotated[str, "Resulting Schema from the DB"]:
    """Fetches the schema of the database (table names, columns, types, and sample values)."""
    
    conn = sqlite3.connect(DB_PATH)
    cursor = conn.cursor()
    
    # Get all table names
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    tables = cursor.fetchall()
    
    schema = {}
    
    for table in tables:
        table_name = table[0]
        
        # Get column details
        cursor.execute(f"PRAGMA table_info({table_name});")
        columns = cursor.fetchall()
        
        # Get sample data (first 3 rows) from the table
        cursor.execute(f"SELECT * FROM {table_name} LIMIT 3;")
        sample_data = cursor.fetchall()
        
        schema[table_name] = []
        for col in columns:
            col_name, col_type = col[1], col[2]
            # Extract sample values for the column
            col_index = columns.index(col)
            sample_values = [row[col_index] for row in sample_data] if sample_data else ["No data"]
            
            schema[table_name].append({
                "name": col_name,
                "type": col_type,
                "samples": sample_values
            })
    
    conn.close()

    schema_str = "\n".join([
        "Table: {}\nColumns:\n{}".format(
            table, "\n".join(
                ["  - {} ({}) | Samples: {}".format(col["name"], col["type"], ", ".join(map(str, col["samples"]))) 
                 for col in cols]
            )
        )
        for table, cols in schema.items()
    ])
    # print('schema_strschema_strschema_str',schema_str)
    return schema_str

DB_PATH='spider_db/spider_db.db'
fetch_schema(DB_PATH)


'Table: Customers\nColumns:\n  - Customer_ID (INTEGER) | Samples: 1, 2, 3\n  - Customer_Details (VARCHAR(255)) | Samples: America Jaskolski, Ellsworth Paucek, Mrs. Hanna Willms\nTable: Customer_Policies\nColumns:\n  - Policy_ID (INTEGER) | Samples: 119, 141, 143\n  - Customer_ID (INTEGER) | Samples: 1, 2, 3\n  - Policy_Type_Code (CHAR(15)) | Samples: Car, Life, Car\n  - Start_Date (DATE) | Samples: 2018-01-21, 2017-08-21, 2017-06-16\n  - End_Date (DATE) | Samples: 2017-12-15, 2017-09-29, 2017-12-09\nTable: Claims\nColumns:\n  - Claim_ID (INTEGER) | Samples: 143, 423, 442\n  - Policy_ID (INTEGER) | Samples: 744, 552, 473\n  - Date_Claim_Made (DATE) | Samples: 2017-03-11, 2016-08-12, 2017-02-24\n  - Date_Claim_Settled (DATE) | Samples: 2017-11-03, 2018-01-27, 2018-01-21\n  - Amount_Claimed (INTEGER) | Samples: 43884, 79134, 70088\n  - Amount_Settled (INTEGER) | Samples: 1085, 1724, 1189\nTable: Settlements\nColumns:\n  - Settlement_ID (INTEGER) | Samples: 357, 412, 476\n  - Claim_ID (INT

In [13]:
def validate_sql_query(sql_query: str, db_path: str) -> str:
    """
    Helper function to validate an SQL query.
    Executes the query on the specified database and returns 'success' if valid, otherwise 'failure'.
    
    Args:
        sql_query (str): The SQL query to validate.
        db_path (str): Path to the database file.

    Returns:
        str: 'success' if the query is valid, otherwise 'failure: <error_message>'.
    """
    import sqlite3  # Or use the relevant database connector

    try:
        conn = sqlite3.connect(db_path)  # Connect to the provided database
        cursor = conn.cursor()
        cursor.execute("EXPLAIN QUERY PLAN " + sql_query)  # Lightweight validation
        conn.close()
        return "success"
    except Exception as e:
        return f"failure: {str(e)}"

In [14]:
def validate_and_execute_sql_query(sql_query: Annotated[str, "Result of SQL query from SQLAgent"], db_path: Annotated[str, "Path to the Database"]) -> Annotated[dict, "Result of running the query with keys being either success or failure"]:
    """
    Validates and executes an SQL query on the specified database.
    
    Args:
        sql_query (str): The SQL query to validate and execute.
        db_path (str): Path to the database file.

    Returns:
        str: 'success' if the query is valid but does not return results (e.g., UPDATE).
        list: Query results if the query is valid and returns data (e.g., SELECT).
        str: 'failure: <error_message>' if the query is invalid.
    """
    try:
        conn = sqlite3.connect(db_path)
        cursor = conn.cursor()
        
        # Validate query using EXPLAIN QUERY PLAN
        cursor.execute("EXPLAIN QUERY PLAN " + sql_query)
        
        # If validation passes, execute the actual query
        cursor.execute(sql_query)

        if sql_query.strip().lower().startswith("select"):
            # Fetch and return results for SELECT queries
            results = cursor.fetchall()
            conn.close()
            return results  # Returns list of tuples

        conn.commit()  # Commit changes for INSERT, UPDATE, DELETE
        conn.close()
        return {"success": results}  # No need to return results for non-SELECT queries

    except Exception as e:
        return f"failure: {str(e)}"

#### final function


In [15]:
def final_function(DB_PATH,user_query):

    user_proxy = autogen.ConversableAgent(
    name="Admin",
    code_execution_config=False,
    llm_config=config_list,
    human_input_mode="NEVER", max_consecutive_auto_reply=1,
    system_message = f"Analyze the user query :{user_query}, and pass it to SchemaAgent for further process.",
    is_termination_msg=lambda msg: (
                                        msg.get("content") is not None 
                                        and "thank you" in msg["content"].lower() 
                                        and msg["content"].strip().lower() != "thank you."
                                    )
)


    schema_agent = autogen.ConversableAgent(
        name="SchemaAgent",
        llm_config=config_list,
        system_message=f"As a database expert, your task is to inspect the database located at {DB_PATH} "
            f"by internally calling the function fetch_schema({DB_PATH}). "
            "Do not print or output the schema directly. Instead, analyze it to identify the structure, including table names, columns, and data types. "
            f"Next, examine the user query: {user_query} and determine which tables are most relevant for answering it. "
            "If no relevant tables can be identified, alert the user with the message: "
            "'The query cannot be answered as no relevant tables were found in the database.' Provide a reason for this and terminate the chat by saying 'Thank you'. "
            "Once the relevant tables are determined or the query is deemed unanswerable, terminate the process by saying 'terminate' "
            "and return only the necessary information to the SQLAgent."
    )
    
    
    sql_agent = autogen.ConversableAgent(
        name="SQLAgent",
        llm_config=config_list,
        system_message=(
            "As a SQL expert, your task is to generate an accurate SQL query based on the provided database schema and user query.and column name should be in the table"
            "You will receive the structured schema and the user query as input from SchemaAgent."
            "Analyze the schema to determine the relevant tables and construct an optimized SQL query that accurately answers the user's request. "
            "Once you have generated the SQL query, terminate the process by saying 'terminate' and return the final SQL query."
            "If no relevant tables can be identified, alert the user with the message: 'The query cannot be answered as no relevant tables were found in the database.' Provide a reason for this and terminate the chat by saying thank you"
            
        )
    )
    
    validation_agent = autogen.ConversableAgent(
        name="ValidationAgent",
        llm_config=config_list,
        system_message=(
            "As a SQL validation expert, your task is to validate the SQL query generated by SQLAgent. "
            f"You will receive an SQL query and the database path : {DB_PATH} as input. "
            f"Use the provided helper function `validate_and_execute_sql_query(sql_query, {DB_PATH})` to check whether the query is correct. "
            "If the query is valid, respond with 'SQL query is valid' and return the result to ResultFormattingAgent."
            "If the query is invalid, respond with 'SQL query is invalid' along with the error message to SQLErrorCorrectionAgent."
            "Once validation is complete, terminate the process by saying 'terminate'."
        )
    )
    
    sql_error_correction_agent = autogen.ConversableAgent(
        name="SQLErrorCorrectionAgent",
        llm_config=config_list,
        system_message=(
            "You are an expert in SQL query error correction. "
            "Your task is to correct SQL query errors based on the given failure message and database schema. "
            "You will receive a failed SQL query along with an error message explaining why it failed. "
            "Ensure the corrected query follows proper SQL syntax and is aligned with the database schema. "
            "Once you have corrected the query, return the fixed version to the ValidationAgent for validation."
        )
    )
    
 
    result_formatting_agent = autogen.ConversableAgent(
        name="ResultFormattingAgent",
        llm_config=config_list,
        system_message=(
        "You are responsible for formatting database query results into a structured, human-readable format. "
        "When provided with raw query results, process them into a clear tabular format, numbered list, or summary, depending on the result type. "
        "For large datasets, summarize key trends instead of listing every record. "
        "Return the formatted output in a way that is easy to understand."
        "Once you have the final result, output the result and finally close the conversation by saying thank you"
    )
)
    register_function(
    fetch_schema,
    caller=schema_agent,
    executor=schema_agent,
    name="FetchingSchemaSchemaAgent",
    description="Get Fetch Schema",
    )
    
    register_function(
        fetch_schema,
        caller=user_proxy,
        executor=user_proxy,
        name="FetchingSchemaUserProxy",
        description="Get Fetch Schema",
    )
    
    register_function(
        validate_sql_query,
        caller=sql_agent,
        executor=validation_agent,
        name="ValidationFromSQLAgent",
        description="Validationg the query",
    )
    
    register_function(
        validate_and_execute_sql_query,
        caller=validation_agent,
        executor=validation_agent,
        name="ValidationToAgent",
        description="Validationg the query",
    )
# result_formatting_agent
    groupchat = autogen.GroupChat(
    agents=[user_proxy, schema_agent, sql_agent, validation_agent, sql_error_correction_agent, result_formatting_agent],
    messages=[],
    max_round=5,
    allowed_or_disallowed_speaker_transitions={
    user_proxy: [schema_agent, sql_agent, validation_agent, sql_error_correction_agent, result_formatting_agent],
    schema_agent: [user_proxy, sql_agent],
    sql_agent: [user_proxy, validation_agent],
    validation_agent: [sql_error_correction_agent, result_formatting_agent],
    sql_error_correction_agent: [user_proxy, validation_agent],
    result_formatting_agent: [user_proxy]
    },
    speaker_transitions_type="allowed",
)


    
 
    
    manager = autogen.GroupChatManager(
        groupchat=groupchat, llm_config={"config_list": config_list},
        is_termination_msg=lambda msg: (
        isinstance(msg, dict) and
        msg.get("name") == "ResultFormattingAgent" and
        bool(msg.get("content"))  # Ensure content is not empty
    )
    )
    chat_result = user_proxy.initiate_chat(
    manager,
    max_turns=3,
    message = f"Analyze the user query : {user_query}"
    )

    for idx, msg in enumerate(manager.groupchat.messages):
        if msg.get("name") == "SQLAgent":
            for tool_call in msg.get("tool_calls", []):
                arguments_str = tool_call.get("function", {}).get("arguments", "{}")
                arguments = json.loads(arguments_str)  # Convert string to dictionary
                global generated_sql
                generated_sql = arguments.get("sql_query")
                
                if generated_sql:  # Only print if the SQL query is not None
                    print("Extracted SQL Query:\n", generated_sql)
        print("000000000000000000000000000")
        if msg.get('name')=='ValidationAgent':
            print('11111111111111111111111111111msg',msg)
            if isinstance(msg.get("content"), str) and msg["content"].startswith("[('"):
                    generated_result = msg["content"]
                    print("✅ Found matching message:\n", generated_result)
        if msg.get('name')=='ResultFormattingAgent':
            print('DDDDDDDDDDDDDDDDDDDDDDDDD',msg)
             
    
    judge_agent = autogen.ConversableAgent(
        name="JudgeAgent",
        llm_config=config_list,
        system_message=(
            "You are an expert in SQL query evaluation. Your task is to assess SQL queries generated by the SQLAgent by comparing them with reference SQL queries from a dataset. "
            "For each comparison, evaluate the following aspects using the defined numeric scoring guidelines:\n\n"
    
            "1. Accuracy (0–10): Does the AI-generated SQL return the same results as the reference SQL when executed on the same database?\n"
            "- 10: Identical results\n"
            "- 5: Partial match (e.g., missing filters, subset/superset of rows)\n"
            "- 0: Incorrect or different logic\n\n"
    
            "2. Efficiency (0–10): Evaluate the query's performance in terms of execution time, indexing, and join optimization.\n"
            "- 10: Highly efficient (<1s, optimized with indexes and joins)\n"
            "- 6: Moderate (1–3s, room for improvement)\n"
            "- 3: Low efficiency (>3s, full scans or nested loops)\n\n"
    
            "3. Hallucination (0–10): Check for invalid elements like non-existent tables, columns, or illogical clauses.\n"
            "- 0: No hallucination\n"
            "- 5: Minor (1–2 issues)\n"
            "- 10: Major hallucinations (e.g., many or critical invalid references)\n\n"
    
            "4. Completeness (0–10): Does the query fully answer the user’s question?\n"
            "- 10: Fully complete\n"
            "- 5: Partially complete (some logic or filters missing)\n"
            "- 0: Incomplete or irrelevant\n\n"
    
            "5. Structure Similarity (0–10): Are similar joins, filters, subqueries, and groupings used as in the reference?\n"
            "- 10: Very similar\n"
            "- 6: Moderately similar\n"
            "- 2: Very different\n\n"
    
            "6. Readability & Maintainability (0–10): Is the SQL query easy to read, well-formatted, and maintainable?\n"
            "- 10: Excellent\n"
            "- 7: Good\n"
            "- 4: Fair\n"
            "- 1: Poor\n\n"
    
            "7. Overall Score (0–10): Compute using the formula below:\n"
            "   score = round(0.3 * accuracy + 0.15 * efficiency + 0.15 * (10 - hallucination) + "
            "0.2 * completeness + 0.1 * structure_similarity + 0.1 * readability)\n\n"
    
            "Return your evaluation strictly in the following JSON format:\n"
            "{\n"
            '  "accuracy": integer (0–10),\n'
            '  "efficiency": integer (0–10),\n'
            '  "hallucination": integer (0–10),\n'
            '  "completeness": integer (0–10),\n'
            '  "structure_similarity": integer (0–10),\n'
            '  "readability": integer (0–10),\n'
            '  "score": integer (0–10),\n'
            '  "comments": "Detailed feedback explaining the evaluation, including mismatches, inefficiencies, hallucinations, and suggested improvements."\n'
            "}"
        )
    )
 
    with open("filtered_insurance.json", "r") as file:
        filtered_data = json.load(file)
     
    evaluation_results = []
    for entry in filtered_data:
        # print('sssssssssssssss',entry["question"],'sssssssssssssssss',user_query)
        if entry["question"].strip().lower() == user_query.strip().lower():
        # if entry["question"]==user_query:
            print('sssssssssss',user_query)
            
            nl_query = entry["question"]
            global ground_truth_sql
            ground_truth_sql = entry["query"]
            conn = sqlite3.connect(DB_PATH)
            cursor = conn.cursor()
            
            cursor.execute(ground_truth_sql)
            db_results = cursor.fetchall()
            
            conn.close()
            print('real resut&&&&&&&&&&&&&&&&&&&&&&&&&',db_results)
    
            
             # Prepare the evaluation prompt for the JudgeAgent
    evaluation_prompt = (
                f"Natural Language Query: {nl_query}\n"
                f"Reference SQL: {ground_truth_sql}\n"
                f"AI-Generated SQL: {generated_sql}\n\n"
                "Please evaluate the AI-Generated SQL against the Reference SQL based on the criteria provided  and check the logic not the structure."
            )

            # JudgeAgent evaluates the generated SQL
    evaluation = judge_agent.initiate_chat(
                message=evaluation_prompt,
                recipient=judge_agent,
                max_turns=1
            )
    evaluation_results.append(evaluation.summary)
 
    print('evaluation_results',evaluation_results[0])
    summary = evaluation.summary   
    print('!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!!',evaluation_results[0])
    json_str =  re.search(r"\{.*\}", evaluation_results[0], re.DOTALL)
   

    print('lllllllllllllllllllllll',json_str)
     
    if json_str:
        result = json_str.group(0)
        print('resultresultresult*****************',type(result),result)
        try:
            evaluation = json.loads(result)  # Now it's a dictionary
        except json.JSONDecodeError as e:
            return f"<p>Error decoding JSON: {e}</p>", "", ""
    
    html_table = """
    <h3> Evaluation Metrics (as Table)</h3>
    <table style='width:100%; border-collapse: collapse;' border='1'>
    <tr><th>Metric</th><th>Value</th></tr>
    """
    for k, v in evaluation.items():
        html_table += f"<tr><td><b>{k}</b></td><td>{v}</td></tr>"
    html_table += "</table>"
    generated_result=db_results
    print(generated_result, db_results,'ssssssssssssssssssssssssssss')


    
    return html_table,ground_truth_sql,generated_sql , generated_result, db_results  



def result_formating_agent(generated_result):
    result_formatting_agent = autogen.ConversableAgent(
        name="ResultFormattingAgent",
        llm_config=config_list,
       system_message = (
         "You are responsible for formatting database query results into a structured, human-readable format. "
        "When provided with raw query results, process them into a clear tabular format, numbered list, or summary, depending on the result type. "
        "For large datasets, summarize key trends instead of listing every record. "
        "Return the formatted output in a way that is easy to understand."
        "Once you have the final result, output the result and finally close the conversation by saying thank you"
)
    )

    input_to_agent = f"generated_database = {generated_result}"

    response_user = result_formatting_agent.initiate_chat(
    recipient=result_formatting_agent,
    message=input_to_agent,
    max_turns=1
    )

    
    response_text = response_user.summary 
    print('**************', response_text)
    return response_text,'result_2'

In [16]:
user_query= "Among all the claims, which claims have a claimed amount larger than the average? List the date the claim was made and the date it was settled.",
DB_PATH = "spider_db/spider_db.db"


# final_result,ground_truth,generated_sql,var1,var2 =final_function(DB_PATH,user_query)
# print('kkkkkkkkkkkkkkkkkk', final_result,'\n\n\n',ground_truth ,'\n\n\n',generated_sql )

In [17]:
def clear_all():
    return "", "", "", "", "", ""

In [18]:

with gr.Blocks() as demo:

    gr.Markdown("# 🧠 Natural Language to SQL Evaluator")
    user_input = gr.Textbox(label="Enter your natural language query")
    db_path_input = gr.Textbox(label="DB Path", value="spider_db/spider_db.db")
    run_btn = gr.Button("Run Evaluation")
    generated_sql_output = gr.Textbox(label="Generated SQL")
    reference_sql_output = gr.Textbox(label="Reference SQL (from dataset)")
    judge_output =gr.HTML(label="Judge Evaluation (as Table)")
    generated_result=gr.Textbox(label="Generated SQL result ")
    db_results=gr.Textbox(label="Reference query result")
    clear_btn = gr.Button("Clear")
    run_btn.click(
        final_function,
        inputs=[db_path_input,user_input],
        outputs=[judge_output,reference_sql_output,generated_sql_output,generated_result, db_results]
    )
    
    analyze_btn = gr.Button("Analyze")
    
    nl_output = gr.Textbox(label="Natural Language Summary")
    chart_output = gr.Image(label="Generated Chart",visible=False)

    analyze_btn.click(
        result_formating_agent,
        inputs=[generated_result],
        outputs=[nl_output, chart_output]
    )

    
    clear_btn.click(
        clear_all,
        inputs=[],
        outputs=[user_input, reference_sql_output, generated_sql_output, generated_result, db_results, judge_output]
    )

demo.launch(share=True)



* Running on local URL:  http://127.0.0.1:7861
* Running on public URL: https://88a7fdd04be3667625.gradio.live

This share link expires in 1 week. For free permanent hosting and GPU upgrades, run `gradio deploy` from the terminal in the working directory to deploy to Hugging Face Spaces (https://huggingface.co/spaces)




[33mAdmin[0m (to chat_manager):

Analyze the user query : Among all the claims, which claims have a claimed amount larger than the average? List the date the claim was made and the date it was settled. 

--------------------------------------------------------------------------------
[32m
Next speaker: SchemaAgent
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mSchemaAgent[0m (to chat_manager):

[32m***** Suggested tool call (call_xhqb): FetchingSchemaSchemaAgent *****[0m
Arguments: 
{"DB_PATH": "spider_db/spider_db.db"}
[32m**********************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: SchemaAgent
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[35m
>>>>>>>> EXECUTING FUNCTION FetchingSchemaSchemaAgent...
Call ID: call_xhqb
Input arguments: {'DB_PATH': 'spider_db/spider_db.db'}[0m
[33mSchemaAgent[0m (to chat_manager):

[32m***** Response from calling tool (call_xh

In [10]:
def final_function_eval(DB_PATH,user_query):

    user_proxy = autogen.ConversableAgent(
    name="Admin",
    code_execution_config=False,
    llm_config=config_list,
    human_input_mode="NEVER", max_consecutive_auto_reply=1,
    system_message = f"Analyze the user query :{user_query}, and pass it to SchemaAgent for further process.",
    is_termination_msg=lambda msg: (
                                        msg.get("content") is not None 
                                        and "thank you" in msg["content"].lower() 
                                        and msg["content"].strip().lower() != "thank you."
                                    )
)


    schema_agent = autogen.ConversableAgent(
        name="SchemaAgent",
        llm_config=config_list,
        system_message=f"As a database expert, your task is to inspect the database located at {DB_PATH} "
            f"by internally calling the function fetch_schema({DB_PATH}). "
            "Do not print or output the schema directly. Instead, analyze it to identify the structure, including table names, columns, and data types. "
            f"Next, examine the user query: {user_query} and determine which tables are most relevant for answering it. "
            "If no relevant tables can be identified, alert the user with the message: "
            "'The query cannot be answered as no relevant tables were found in the database.' Provide a reason for this and terminate the chat by saying 'Thank you'. "
            "Once the relevant tables are determined or the query is deemed unanswerable, terminate the process by saying 'terminate' "
            "and return only the necessary information to the SQLAgent."
    )
    
    
    sql_agent = autogen.ConversableAgent(
        name="SQLAgent",
        llm_config=config_list,
        system_message=(
            "As a SQL expert, your task is to generate an accurate SQL query based on the provided database schema and user query.and column name should be in the table"
            "You will receive the structured schema and the user query as input from SchemaAgent."
            "Analyze the schema to determine the relevant tables and construct an optimized SQL query that accurately answers the user's request. "
            "Once you have generated the SQL query, terminate the process by saying 'terminate' and return the final SQL query."
            "If no relevant tables can be identified, alert the user with the message: 'The query cannot be answered as no relevant tables were found in the database.' Provide a reason for this and terminate the chat by saying thank you"
            
        )
    )
    
    validation_agent = autogen.ConversableAgent(
        name="ValidationAgent",
        llm_config=config_list,
        system_message=(
            "As a SQL validation expert, your task is to validate the SQL query generated by SQLAgent. "
            f"You will receive an SQL query and the database path : {DB_PATH} as input. "
            f"Use the provided helper function `validate_and_execute_sql_query(sql_query, {DB_PATH})` to check whether the query is correct. "
            "If the query is valid, respond with 'SQL query is valid' and return the result to ResultFormattingAgent."
            "If the query is invalid, respond with 'SQL query is invalid' along with the error message to SQLErrorCorrectionAgent."
            "Once validation is complete, terminate the process by saying 'terminate'."
        )
    )
    
    sql_error_correction_agent = autogen.ConversableAgent(
        name="SQLErrorCorrectionAgent",
        llm_config=config_list,
        system_message=(
            "You are an expert in SQL query error correction. "
            "Your task is to correct SQL query errors based on the given failure message and database schema. "
            "You will receive a failed SQL query along with an error message explaining why it failed. "
            "Ensure the corrected query follows proper SQL syntax and is aligned with the database schema. "
            "Once you have corrected the query, return the fixed version to the ValidationAgent for validation."
        )
    )
    
    result_formatting_agent = autogen.ConversableAgent(
        name="ResultFormattingAgent",
        llm_config=config_list,
        system_message=(
            "You are responsible for formatting database query results into a structured, human-readable format. "
            "When provided with raw query results, process them into a clear tabular format, numbered list, or summary, depending on the result type. "
            "For large datasets, summarize key trends instead of listing every record. "
            "Return the formatted output in a way that is easy to understand."
            "Once you have the final result, output the result and finally close the conversation by saying thank you"
        )
    )

    register_function(
    fetch_schema,
    caller=schema_agent,
    executor=schema_agent,
    name="FetchingSchemaSchemaAgent",
    description="Get Fetch Schema",
    )
    
    register_function(
        fetch_schema,
        caller=user_proxy,
        executor=user_proxy,
        name="FetchingSchemaUserProxy",
        description="Get Fetch Schema",
    )
    
    register_function(
        validate_sql_query,
        caller=sql_agent,
        executor=validation_agent,
        name="ValidationFromSQLAgent",
        description="Validationg the query",
    )
    
    register_function(
        validate_and_execute_sql_query,
        caller=validation_agent,
        executor=validation_agent,
        name="ValidationToAgent",
        description="Validationg the query",
    )

    groupchat = autogen.GroupChat(
    agents=[user_proxy, schema_agent, sql_agent, validation_agent, sql_error_correction_agent, result_formatting_agent],
    messages=[],
    max_round=5,
    allowed_or_disallowed_speaker_transitions={
    user_proxy: [schema_agent, sql_agent, validation_agent, sql_error_correction_agent, result_formatting_agent],
    schema_agent: [user_proxy, sql_agent],
    sql_agent: [user_proxy, validation_agent],
    validation_agent: [sql_error_correction_agent, result_formatting_agent],
    sql_error_correction_agent: [user_proxy, validation_agent],
    result_formatting_agent: [user_proxy]
    },
    speaker_transitions_type="allowed",
    )
    
    
    # groupchat = autogen.GroupChat(
    #     agents=[user_proxy, schema_agent, sql_agent, validation_agent, sql_error_correction_agent, result_formatting_agent],
    #     messages=[],
    #     max_round=5
    # )
    
    
    manager = autogen.GroupChatManager(
        groupchat=groupchat, llm_config={"config_list": config_list},
        is_termination_msg=lambda msg: (
        isinstance(msg, dict) and
        msg.get("name") == "ResultFormattingAgent" and
        bool(msg.get("content"))  # Ensure content is not empty
    )
    )
    chat_result = user_proxy.initiate_chat(
    manager,
    max_turns=3,
    message = f"Analyze the user query : {user_query}"
    )

    for idx, msg in enumerate(manager.groupchat.messages):
        if msg.get("name") == "SQLAgent":
            for tool_call in msg.get("tool_calls", []):
                arguments_str = tool_call.get("function", {}).get("arguments", "{}")
                arguments = json.loads(arguments_str)  # Convert string to dictionary
                generated_sql = arguments.get("sql_query")
    
                if generated_sql:  # Only print if the SQL query is not None
                    print("Extracted SQL Query:\n", generated_sql)


    judge_agent = autogen.ConversableAgent(
        name="JudgeAgent",
        llm_config=config_list,
        system_message=(
            "You are an expert in SQL query evaluation. Your task is to assess SQL queries generated by the SQLAgent by comparing them with reference SQL queries from a dataset. "
            "For each comparison, evaluate the following aspects using the defined numeric scoring guidelines:\n\n"
    

            "1. Accuracy (0–10): Does the AI-generated SQL return the same results as the reference SQL when executed on the same database?\n"
            "- 10: Identical results\n"
            "- 5: Partial match (e.g., missing filters, subset/superset of rows)\n"
            "- 0: Incorrect or different logic\n\n"
    
            "2. Efficiency (0–10): Evaluate the query's performance in terms of execution time, indexing, and join optimization.\n"
            "- 10: Highly efficient (<1s, optimized with indexes and joins)\n"
            "- 6: Moderate (1–3s, room for improvement)\n"
            "- 3: Low efficiency (>3s, full scans or nested loops)\n\n"
    
            "3. Hallucination (0–10): Check for invalid elements like non-existent tables, columns, or illogical clauses.\n"
            "- 0: No hallucination\n"
            "- 5: Minor (1–2 issues)\n"
            "- 10: Major hallucinations (e.g., many or critical invalid references)\n\n"
    
            "4. Completeness (0–10): Does the query fully answer the user’s question?\n"
            "- 10: Fully complete\n"
            "- 5: Partially complete (some logic or filters missing)\n"
            "- 0: Incomplete or irrelevant\n\n"
    
            "5. Structure Similarity (0–10): Are similar joins, filters, subqueries, and groupings used as in the reference?\n"
            "- 10: Very similar\n"
            "- 6: Moderately similar\n"
            "- 2: Very different\n\n"
    
            "6. Readability & Maintainability (0–10): Is the SQL query easy to read, well-formatted, and maintainable?\n"
            "- 10: Excellent\n"
            "- 7: Good\n"
            "- 4: Fair\n"
            "- 1: Poor\n\n"
    
            "7. Overall Score (0–10): Compute using the formula below:\n"
            "   score = round(0.3 * accuracy + 0.15 * efficiency + 0.15 * (10 - hallucination) + "
            "0.2 * completeness + 0.1 * structure_similarity + 0.1 * readability)\n\n"
    
            "Return your evaluation strictly in the following JSON format:\n"
            "{\n"
            '  "accuracy": integer (0–10),\n'
            '  "efficiency": integer (0–10),\n'
            '  "hallucination": integer (0–10),\n'
            '  "completeness": integer (0–10),\n'
            '  "structure_similarity": integer (0–10),\n'
            '  "readability": integer (0–10),\n'
            '  "score": integer (0–10),\n'
            '  "comments": "Detailed feedback explaining the evaluation, including mismatches, inefficiencies, hallucinations, and suggested improvements."\n'
            "}"
        )
    )
    with open("filtered_insurance.json", "r") as file:
        filtered_data = json.load(file)
    evaluation_results = []

    if isinstance(user_query, tuple):
        user_query = user_query[0]
        print('user_query',user_query)
    for entry in filtered_data:
        # print('gggggggggg',user_query,entry["question"])
        if entry["question"]==user_query:
            print('sssssssssss',user_query)
            nl_query = entry["question"]
            ground_truth_sql = entry["query"]
     
            # Prepare the evaluation prompt for the JudgeAgent
            evaluation_prompt = (
                f"Natural Language Query: {nl_query}\n"
                f"Reference SQL: {ground_truth_sql}\n"
                f"AI-Generated SQL: {generated_sql}\n\n"
                "Please evaluate the AI-Generated SQL against the Reference SQL based on the criteria provided  and check the logic not the structure."
            )
        
            # JudgeAgent evaluates the generated SQL
            evaluation = judge_agent.initiate_chat(
                message=evaluation_prompt,
                recipient=judge_agent,
                max_turns=1
            )
        
            evaluation_results.append(evaluation)
            summary = evaluation.summary
            break
                
    start = summary.find("{")
    end = summary.rfind("}") + 1
    json_part = summary[start:end]
    
    # Step 2: Parse JSON
    eval_data = json.loads(json_part)
    df = pd.DataFrame(list(eval_data.items()), columns=["Metric", "Value"])
    pd.set_option('display.max_colwidth', None)
    
    return df,ground_truth_sql,generated_sql  # assuming 'chat_result' is your ChatResult object
 

In [11]:
# user_query= "Among all the claims, which claims have a claimed amount larger than the average? List the date the claim was made and the date it was settled.",
DB_PATH = "spider_db/spider_db.db"



# final_result,ground_truth,generated_sql =final_function_eval(DB_PATH,user_query)
# print('kkkkkkkkkkkkkkkkkk', final_result,'\n\n\n',ground_truth ,'\n\n\n',generated_sql )

In [12]:
df_new=pd.read_csv('evaluation_result_csv.csv')

In [None]:
output_file = "final_results_eval.txt"

with open(output_file, "w",) as f:
    # Iterate through the first four questions in the DataFrame
    for question in df_new['question'][5:10]:
        # Print the question to the console
        print(question, '\n\n')
        
        # Evaluate the query using your function
        final_result, ground_truth, generated_sql = final_function_eval(DB_PATH, question)
        
        # Prepare the output string
        output_str = (
            f"Question: {question}\n\n"
            f"Final Result:\n{final_result}\n\n"
            f"Ground Truth:\n{ground_truth}\n\n"
            f"Generated SQL:\n{generated_sql}\n\n"
            + "-" * 50 + "\n\n"
        )
        
        # Write the output to the file
        f.write(output_str)
        
print(f"Results stored in {output_file}")
    

What are the method, date and amount of each payment? Sort the list in ascending order of date. 


[33mAdmin[0m (to chat_manager):

Analyze the user query : What are the method, date and amount of each payment? Sort the list in ascending order of date.

--------------------------------------------------------------------------------
[32m
Next speaker: SQLAgent
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[33mSQLAgent[0m (to chat_manager):

[32m***** Suggested tool call (call_4g91): ValidationFromSQLAgent *****[0m
Arguments: 
{"db_path": "path_to_database", "sql_query": "SELECT method, date, amount FROM payments ORDER BY date ASC;"}
[32m*******************************************************************[0m

--------------------------------------------------------------------------------
[32m
Next speaker: ValidationAgent
[0m
[31m
>>>>>>>> USING AUTO REPLY...[0m
[35m
>>>>>>>> EXECUTING FUNCTION ValidationFromSQLAgent...
Call ID: call_4g91
Input arguments: {'db_path': 'path_t

In [None]:
import json


with open('filtered_insurance.json', 'r') as reference:
    data = json.load(reference)

# Convert to DataFrame
df = pd.DataFrame(data)
main_data=df[['db_id','query','question']]
main_data['question'][:10]

In [None]:
import json


with open('filtered_insurance.json', 'r') as reference:
    data = json.load(reference)

# Convert to DataFrame
df = pd.DataFrame(data)
main_data=df[['db_id','query','question']]
len(main_data)