**Necessary Libraries**

In [1]:
import pandas as pd
import numpy as np
import sqlite3

---
The line of code **connection = sqlite3.connect('data.db')** is used to establish a connection to a SQLite database file named 'data.db'.

In [2]:
connection = sqlite3.connect('data.db')

---
The line of code **cursor = connection.cursor()** is used to create a cursor object, which is a crucial component for interacting with a SQLite database in a Python application.

In [3]:
cursor = connection.cursor()

In [4]:
# Create Employees table
command_1 = """CREATE TABLE IF NOT EXISTS employees (
  EmployeeID INTEGER PRIMARY KEY,
  FirstName TEXT NOT NULL,
  LastName TEXT NOT NULL,
  Age INTEGER CHECK (Age >= 18 AND Age <= 65),
  Department TEXT,
  Position TEXT,
  Salary REAL,
  HireDate TEXT CHECK (
    length(HireDate) = 10 AND
    substr(HireDate, 3, 1) = '/' AND
    substr(HireDate, 6, 1) = '/' AND
    cast(substr(HireDate, 1, 2) AS INTEGER) BETWEEN 1 AND 31 AND
    cast(substr(HireDate, 4, 2) AS INTEGER) BETWEEN 1 AND 12 AND
    cast(substr(HireDate, 7, 4) AS INTEGER) > 0
  ),
  ManagerID INTEGER,
  FOREIGN KEY (ManagerID) REFERENCES employees(EmployeeID)
)"""



In [5]:
# Create Managers table
command_2 = """
CREATE TABLE IF NOT EXISTS managers (
  ManagerID INTEGER PRIMARY KEY,
  FirstName TEXT NOT NULL,
  LastName TEXT NOT NULL,
  Department TEXT NOT NULL
)
"""



In [6]:
cursor.execute(command_1)
cursor.execute(command_2)

<sqlite3.Cursor at 0x7f92f36b3440>

In [7]:
# Define the insert statement
insert_statement_1 = """
INSERT INTO employees (EmployeeID, FirstName, LastName, Age, Department, Position, Salary, HireDate, ManagerID)
VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)
"""

In [8]:
# Data to be inserted
employees_data = [
    (1, 'John', 'Smith', 28, 'Sales', 'Manager', 85000, '01/03/2015', 21),
    (2, 'Jane', 'Doe', 34, 'Engineering', 'Developer', 95000, '23/05/2016', 6),
    (3, 'Emily', 'Johnson', 29, 'HR', 'Generalist', 70000, '14/08/2017', 7),
    (4, 'Michael', 'Brown', 45, 'Marketing', 'Coordinator', 80000, '11/11/2014', 20),
    (5, 'Sarah', 'Williams', 31, 'Sales', 'Consultant', 85000, '25/07/2013', 1),
    (6, 'David', 'Jones', 38, 'Engineering', 'Manager', 95000, '17/09/2019', 1),
    (7, 'Laura', 'Garcia', 26, 'HR', 'Manager', 70000, '02/12/2018', 21),
    (8, 'James', 'Miller', 39, 'Marketing', 'SEO', 80000, '10/01/2020', 20),
    (9, 'Anna', 'Davis', 27, 'Sales', 'Associate', 85000, '01/03/2015', 1),
    (10, 'Robert', 'Rodriguez', 41, 'Engineering', 'QA', 95000, '23/05/2016', 6),
    (11, 'Linda', 'Martinez', 33, 'HR', 'Coordinator', 70000, '14/08/2017', 7),
    (12, 'William', 'Hernandez', 30, 'Marketing', 'Analyst', 80000, '11/11/2014', 20),
    (13, 'Elizabeth', 'Lopez', 36, 'Sales', 'Analyst', 85000, '25/07/2013', 1),
    (14, 'Richard', 'Gonzalez', 42, 'Engineering', 'DevOps', 95000, '17/09/2019', 6),
    (15, 'Jessica', 'Wilson', 32, 'HR', 'Analyst', 70000, '02/12/2018', 7),
    (16, 'Joseph', 'Anderson', 37, 'Marketing', 'Associate', 80000, '10/01/2020', 20),
    (17, 'Karen', 'Thomas', 29, 'Sales', 'Coordinator', 85000, '01/03/2015', 1),
    (18, 'Thomas', 'Taylor', 35, 'Engineering', 'Technical support', 95000, '23/05/2016', 6),
    (19, 'Nancy', 'Moore', 40, 'HR', 'Recruiter', 70000, '14/08/2017', 7),
    (20, 'Charles', 'Jackson', 43, 'Marketing', 'Manager', 80000, '11/11/2014', 21),
    (21, 'Alex', 'Johnson', 50, 'Management', 'CEO', 200000, '01/01/2010', None)
]

In [9]:
# Insert each record one by one
for employee in employees_data:
    cursor.execute(insert_statement_1, employee)

In [10]:
insert_statement_2 = """
INSERT INTO managers (ManagerID, FirstName, LastName, Department)
VALUES (?, ?, ?, ?)
"""

In [11]:
# Sample data for managers
managers_data = [
    (1, 'Michael', 'Scott', 'Sales'),
    (2, 'Pam', 'Beesly', 'HR'),
    (3, 'Jim', 'Halpert', 'Sales'),
    (4, 'Dwight', 'Schrute', 'Sales'),
    (5, 'Angela', 'Martin', 'Accounting'),
    (6, 'Stanley', 'Hudson', 'Sales'),
    (7, 'Kevin', 'Malone', 'Accounting'),
    (8, 'Oscar', 'Martinez', 'Accounting'),
    (9, 'Phyllis', 'Vance', 'Sales'),
    (10, 'Andy', 'Bernard', 'Sales'),
    (11, 'Kelly', 'Kapoor', 'Customer Service'),
    (12, 'Ryan', 'Howard', 'Sales'),
    (13, 'Meredith', 'Palmer', 'Supplier Relations'),
    (14, 'Creed', 'Bratton', 'Quality Assurance'),
    (15, 'Toby', 'Flenderson', 'HR'),
    (16, 'Darryl', 'Philbin', 'Warehouse'),
    (17, 'Gabe', 'Lewis', 'HR'),
    (18, 'Holly', 'Flax', 'HR'),
    (19, 'Clark', 'Green', 'Sales'),
    (20, 'Pete', 'Miller', 'Sales'),
    (21, 'Erin', 'Hannon', 'Reception')
]

In [12]:
# Insert each record one by one
for manager in managers_data:
    cursor.execute(insert_statement_2, manager)

---

**langchain langchain-google-genai** This installs the langchain-google-genai package, which is a LangChain integration for Google's Generative AI models. It allows developers to easily use Google's AI models within their LangChain applications.

**pillow** This installs the Pillow library, which is a Python Imaging Library (PIL). Pillow provides a simple interface for processing images, including opening, manipulating, and saving images in various file formats.

In [13]:
!pip install langchain langchain-google-genai pillow



In [14]:
!pip install langchain_community




----
**import os** The provided code snippet is a single line of Python code that imports the os module. The os module is a part of the Python standard library and is widely used in Python programming. It provides a platform-independent way to interact with the underlying operating system

In [15]:
import os

if "GOOGLE_API_KEY" not in os.environ:
    os.environ["GOOGLE_API_KEY"] = "AIzaSyAdwrneDWeYlla_OQXtYNuVKjRGqj3YZLQ"

In [16]:
# imports the ChatGoogleGenerativeAI class from the langchain_google_genai module
from langchain_google_genai import ChatGoogleGenerativeAI

# creates an instance of the class, assigning it to the variable llm.
llm = ChatGoogleGenerativeAI(model="gemini-pro", temperature=0)

---

**from langchain_community.utilities import SQLDatabase:** This line imports the SQLDatabase class from the langchain_community.utilities module. The langchain_community library is a collection of utilities and tools that can be used with the LangChain framework for building language applications.

**db = SQLDatabase.from_uri("sqlite:////content/data.db"):** This line creates an instance of the SQLDatabase class by calling the from_uri() method. The argument "sqlite:////content/data.db" specifies the URI (Uniform Resource Identifier) of the SQLite database file. The sqlite:// prefix indicates that the database is a SQLite database.

**print(db.dialect):** This line prints the dialect of the SQLite database, which is likely "sqlite".

**print(db.get_usable_table_names()):** This line calls the get_usable_table_names() method of the SQLDatabase instance, which returns a list of the table names that can be used in the database. This is a useful method for understanding the structure of the database and the available data.

In [17]:
from langchain_community.utilities import SQLDatabase

db = SQLDatabase.from_uri("sqlite:////content/data.db")
print(db.dialect)
print(db.get_usable_table_names())

sqlite
['employees', 'managers']


---
The code you provided defines a ChatPromptTemplate object using the ChatPromptTemplate class from the langchain.prompts.chat module. This template is used to generate a prompt for a conversational AI agent that interacts with a SQLite database.

In [81]:
from langchain.prompts.chat import ChatPromptTemplate

prompt = ChatPromptTemplate.from_messages(
    [
        ("system",
         """
          You are an agent designed to interact with a SQL database.
Given an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.
Understand the schemas of the tables in the database.
Understand the columns mentioned in the question and don't get confused by if any word consisting of column names come in the question.
If you encounter a column name in the question that doesn't match any table name, check if the column exists in any of the tables before assuming a missing table.
You can order the results by a relevant column to return the most interesting examples in the database.
Never query for all the columns from a specific table, only ask for the relevant columns given the question.
You have access to tools for interacting with the database.
Only use the below tools. Only use the information returned by the below tools to construct your final answer.
You MUST double check your query before executing it. If you get an error while executing a query, rewrite the query and try again.

DO NOT make any DML statements (INSERT, UPDATE, DELETE, DROP etc.) to the database.

To start you should ALWAYS look at the tables in the database to see what you can query.
Do NOT skip this step.
Then you should query the schema of the most relevant tables.

After showing result,
Analyze the generated query and provide suggestions to optimize its performance. Consider factors such as:
        - Using appropriate indexes
        - Optimizing joins
        - Avoiding unnecessary operations
        - Simplifying complex subqueries or expressions

Use the following format for optimization suggestions:
        - Optimize: [the description of optimization]
        - Impact: [impact of optimization on performance]
        - Suggestions: [suggestions for improvement]

Include all above mentioned things in answer
If you don't know the answer, just say that you don't know.

         """),
        ("user", "{question}")
    ]

)

In [82]:
from langchain.agents import AgentType, create_sql_agent
from langchain.agents.agent_toolkits.sql.toolkit import SQLDatabaseToolkit
sql_toolkit = SQLDatabaseToolkit(db=db, llm=llm)
sql_toolkit.get_tools()

sqldb_agent = create_sql_agent(
    llm=llm,
    toolkit=sql_toolkit,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION,
    verbose=True,
    handle_parsing_errors=True
)

In [83]:
question = """ Show me the top 5 highest-paid employees """

In [84]:
sqldb_agent.invoke(prompt.format(
        question=question
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3memployees, managers[0m[32;1m[1;3mAction: sql_db_schema
Action Input: employees[0m[33;1m[1;3m
CREATE TABLE employees (
	"EmployeeID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Age" INTEGER, 
	"Department" TEXT, 
	"Position" TEXT, 
	"Salary" REAL, 
	"HireDate" TEXT, 
	"ManagerID" INTEGER, 
	PRIMARY KEY ("EmployeeID"), 
	FOREIGN KEY("ManagerID") REFERENCES employees ("EmployeeID"), 
	CHECK (Age >= 18 AND Age <= 65)
)

/*
3 rows from employees table:
EmployeeID	FirstName	LastName	Age	Department	Position	Salary	HireDate	ManagerID

*/[0m[32;1m[1;3mAction: sql_db_query_checker
Action Input: SELECT FirstName, LastName, Salary FROM employees ORDER BY Salary DESC LIMIT 5[0m[36;1m[1;3mSELECT FirstName, LastName, Salary FROM employees ORDER BY Salary DESC LIMIT 5[0m[32;1m[1;3mAction: sql_db_query
Action Input: SELECT FirstName, Las

{'input': "System: \n          You are an agent designed to interact with a SQL database.\nGiven an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.\nUnderstand the schemas of the tables in the database.\nUnderstand the columns mentioned in the question and don't get confused by if any word consisting of column names come in the question.\nIf you encounter a column name in the question that doesn't match any table name, check if the column exists in any of the tables before assuming a missing table.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\nOnly use the below tools. Only use the information returned by the below tools to construct your final answer.\nYou MUST double chec

In [85]:
question = """Who is the youngest employee in the Engineering department? """

In [86]:
sqldb_agent.invoke(prompt.format(
        question=question
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3memployees, managers[0m[32;1m[1;3mAction: sql_db_schema
Action Input: employees[0m[33;1m[1;3m
CREATE TABLE employees (
	"EmployeeID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Age" INTEGER, 
	"Department" TEXT, 
	"Position" TEXT, 
	"Salary" REAL, 
	"HireDate" TEXT, 
	"ManagerID" INTEGER, 
	PRIMARY KEY ("EmployeeID"), 
	FOREIGN KEY("ManagerID") REFERENCES employees ("EmployeeID"), 
	CHECK (Age >= 18 AND Age <= 65)
)

/*
3 rows from employees table:
EmployeeID	FirstName	LastName	Age	Department	Position	Salary	HireDate	ManagerID

*/[0m[32;1m[1;3mAction: sql_db_query_checker
Action Input: SELECT FirstName, LastName, Age FROM employees WHERE Department = 'Engineering' ORDER BY Age ASC LIMIT 1[0m[36;1m[1;3mSELECT FirstName, LastName, Age FROM employees WHERE Department = 'Engineering' ORDER BY Age ASC LIMIT 1[0m[32;1m[1;3mActi

{'input': "System: \n          You are an agent designed to interact with a SQL database.\nGiven an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.\nUnderstand the schemas of the tables in the database.\nUnderstand the columns mentioned in the question and don't get confused by if any word consisting of column names come in the question.\nIf you encounter a column name in the question that doesn't match any table name, check if the column exists in any of the tables before assuming a missing table.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\nOnly use the below tools. Only use the information returned by the below tools to construct your final answer.\nYou MUST double chec

In [87]:
# Handles Subquery


question = """Show the names of employees whose salary is above the average salary in their department."""

In [88]:
sqldb_agent.invoke(prompt.format(
        question=question
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3memployees, managers[0m[32;1m[1;3mAction: sql_db_schema
Action Input: employees[0m[33;1m[1;3m
CREATE TABLE employees (
	"EmployeeID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Age" INTEGER, 
	"Department" TEXT, 
	"Position" TEXT, 
	"Salary" REAL, 
	"HireDate" TEXT, 
	"ManagerID" INTEGER, 
	PRIMARY KEY ("EmployeeID"), 
	FOREIGN KEY("ManagerID") REFERENCES employees ("EmployeeID"), 
	CHECK (Age >= 18 AND Age <= 65)
)

/*
3 rows from employees table:
EmployeeID	FirstName	LastName	Age	Department	Position	Salary	HireDate	ManagerID

*/[0m



[32;1m[1;3mAction: sql_db_schema
Action Input: managers[0m[33;1m[1;3m
CREATE TABLE managers (
	"ManagerID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Department" TEXT NOT NULL, 
	PRIMARY KEY ("ManagerID")
)

/*
3 rows from managers table:
ManagerID	FirstName	LastName	Department

*/[0m[32;1m[1;3mAction: sql_db_query_checker
Action Input: SELECT FirstName, LastName
FROM employees
WHERE Salary > (
    SELECT AVG(Salary)
    FROM employees
    WHERE Department = employees.Department
)[0m[36;1m[1;3mSELECT FirstName, LastName
FROM employees
WHERE Salary > (
    SELECT AVG(Salary)
    FROM employees
    WHERE Department = employees.Department
)[0m[32;1m[1;3mFinal Answer: John Smith, Jane Doe[0m

[1m> Finished chain.[0m


{'input': "System: \n          You are an agent designed to interact with a SQL database.\nGiven an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.\nUnderstand the schemas of the tables in the database.\nUnderstand the columns mentioned in the question and don't get confused by if any word consisting of column names come in the question.\nIf you encounter a column name in the question that doesn't match any table name, check if the column exists in any of the tables before assuming a missing table.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\nOnly use the below tools. Only use the information returned by the below tools to construct your final answer.\nYou MUST double chec

In [89]:
question = """ Display the names of employees whose first and last names start with the same letter. """

In [90]:
sqldb_agent.invoke(prompt.format(
        question=question
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3memployees, managers[0m[32;1m[1;3mAction: sql_db_schema
Action Input: employees[0m[33;1m[1;3m
CREATE TABLE employees (
	"EmployeeID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Age" INTEGER, 
	"Department" TEXT, 
	"Position" TEXT, 
	"Salary" REAL, 
	"HireDate" TEXT, 
	"ManagerID" INTEGER, 
	PRIMARY KEY ("EmployeeID"), 
	FOREIGN KEY("ManagerID") REFERENCES employees ("EmployeeID"), 
	CHECK (Age >= 18 AND Age <= 65)
)

/*
3 rows from employees table:
EmployeeID	FirstName	LastName	Age	Department	Position	Salary	HireDate	ManagerID

*/[0m[32;1m[1;3mAction: sql_db_query_checker
Action Input: SELECT FirstName, LastName FROM employees WHERE FirstName LIKE LastName[0m[36;1m[1;3mSELECT FirstName, LastName FROM employees WHERE FirstName LIKE LastName[0m[32;1m[1;3mAction: sql_db_query
Action Input: SELECT FirstName, LastName FROM emp

{'input': "System: \n          You are an agent designed to interact with a SQL database.\nGiven an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.\nUnderstand the schemas of the tables in the database.\nUnderstand the columns mentioned in the question and don't get confused by if any word consisting of column names come in the question.\nIf you encounter a column name in the question that doesn't match any table name, check if the column exists in any of the tables before assuming a missing table.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\nOnly use the below tools. Only use the information returned by the below tools to construct your final answer.\nYou MUST double chec

In [91]:
# Handles multi-table query


question = """Show me the names and salaries of the top 5 highest-paid employees, along with the names of their managers. """

In [92]:
sqldb_agent.invoke(prompt.format(
        question=question
  ))



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: [0m[38;5;200m[1;3memployees, managers[0m[32;1m[1;3mAction: sql_db_schema
Action Input: employees, managers[0m[33;1m[1;3m
CREATE TABLE employees (
	"EmployeeID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Age" INTEGER, 
	"Department" TEXT, 
	"Position" TEXT, 
	"Salary" REAL, 
	"HireDate" TEXT, 
	"ManagerID" INTEGER, 
	PRIMARY KEY ("EmployeeID"), 
	FOREIGN KEY("ManagerID") REFERENCES employees ("EmployeeID"), 
	CHECK (Age >= 18 AND Age <= 65)
)

/*
3 rows from employees table:
EmployeeID	FirstName	LastName	Age	Department	Position	Salary	HireDate	ManagerID

*/


CREATE TABLE managers (
	"ManagerID" INTEGER, 
	"FirstName" TEXT NOT NULL, 
	"LastName" TEXT NOT NULL, 
	"Department" TEXT NOT NULL, 
	PRIMARY KEY ("ManagerID")
)

/*
3 rows from managers table:
ManagerID	FirstName	LastName	Department

*/[0m[32;1m[1;3mAction: sql_db_query_checker
Action In

{'input': "System: \n          You are an agent designed to interact with a SQL database.\nGiven an input question, first create a syntactically correct sqlite query to run, then look at the results of the query and return the answer.\nUnderstand the schemas of the tables in the database.\nUnderstand the columns mentioned in the question and don't get confused by if any word consisting of column names come in the question.\nIf you encounter a column name in the question that doesn't match any table name, check if the column exists in any of the tables before assuming a missing table.\nYou can order the results by a relevant column to return the most interesting examples in the database.\nNever query for all the columns from a specific table, only ask for the relevant columns given the question.\nYou have access to tools for interacting with the database.\nOnly use the below tools. Only use the information returned by the below tools to construct your final answer.\nYou MUST double chec

# Evaluation


*   First using Scoring Evaluator and Accuracy
*   Correctness



In [30]:
from langchain.evaluation import load_evaluator

---
**Accuracy**

In [31]:
accuracy_criteria = {
    "accuracy": """
Score 1: The answer is completely unrelated to the reference.
Score 3: The answer has minor relevance but does not align with the reference.
Score 5: The answer has moderate relevance but contains inaccuracies.
Score 7: The answer aligns with the reference but has minor errors or omissions.
Score 10: The answer is completely accurate and aligns perfectly with the reference."""
}

In [32]:
evaluator = load_evaluator("labeled_criteria", criteria=accuracy_criteria, llm=llm)

eval_result = evaluator.evaluate_strings(
    input="Find all employees who were hired in the year 2019.",
    prediction="""SELECT name
FROM employees
WHERE salary > (
    SELECT AVG(salary)
    FROM employees
    GROUP BY department_id
    HAVING department_id = employees.department_id
)
LIMIT 5;""",
    reference="""SELECT FirstName, LastName, HireDate
                 FROM employees
                 WHERE strftime('%Y', HireDate) = '2019'
                 LIMIT 5;""",
)

print(eval_result)


{'reasoning': '**Accuracy:**\n\n1. The submission does not find employees who were hired in the year 2019. Instead, it finds employees who have a salary greater than the average salary in their department.\n2. The submission does not return the FirstName, LastName, and HireDate columns as specified in the reference. Instead, it returns the name column, which is not defined in the reference.\n3. The submission uses a LIMIT clause to return only 5 rows, while the reference does not specify a limit.\n\nBased on these observations, the submission does not meet the accuracy criterion.\n\n**Answer:** N', 'value': 'N', 'score': 0}


In [33]:
evaluator = load_evaluator("labeled_criteria", criteria=accuracy_criteria, llm=llm)

eval_result = evaluator.evaluate_strings(
    input="Find all employees who were hired in the year 2019.",
    prediction="""SELECT
   EmployeeID, FirstName, LastName
    FROM employees
    WHERE strftime('%Y', HireDate) = '2019'
    ORDER BY "HireDate" DESC
    LIMIT 5;""",
    reference="""SELECT FirstName, LastName, HireDate
                 FROM employees
                 WHERE strftime('%Y', HireDate) = '2019'
                 LIMIT 5;""",
)

print(eval_result)


{'reasoning': '1. **Accuracy:** The submission uses the correct table and filters the results based on the hiring year, which aligns with the reference. However, the submission includes additional columns (EmployeeID) and sorts the results in descending order by HireDate, which is not specified in the reference. These minor differences do not significantly impact the accuracy of the results.\n2. **Completeness:** The submission retrieves the first five employees hired in 2019, which meets the requirement of the reference.\n\nBased on the above reasoning, the submission meets all the criteria.\n\nY', 'value': 'Y', 'score': 1}


**Correctness**

In [34]:
# evaluator = load_evaluator("labeled_criteria", criteria="correctness", llm=llm)

# eval_result = evaluator.evaluate_strings(
#     input="Find all employees who were hired in the year 2019.",
#     prediction="""SELECT
#    EmployeeID, FirstName, LastName
#     FROM employees
#     WHERE strftime('%Y', HireDate) = '2019'
#     ORDER BY "HireDate" DESC
#     LIMIT 5;""",
#     reference="""SELECT FirstName, LastName, HireDate
#                  FROM employees
#                  WHERE strftime('%Y', hire_date) = '2019'
#                  LIMIT 5;""",
# )
# print(f'With ground truth: {eval_result["score"]}')

In [35]:
# evaluator = load_evaluator("labeled_criteria", criteria="correctness", llm=llm)

# eval_result = evaluator.evaluate_strings(
#     input="Find all employees who were hired in the year 2019.",
#     prediction="""SELECT name
# FROM employees
# WHERE salary > (
#     SELECT AVG(salary)
#     FROM employees
#     GROUP BY department_id
#     HAVING department_id = employees.department_id
# )
# LIMIT 5;""",
#     reference="""SELECT FirstName, LastName, HireDate
#                  FROM employees
#                  WHERE strftime('%Y', hire_date) = '2019'
#                  LIMIT 5;""",
# )
# print(f'With ground truth: {eval_result["score"]}')