In [1]:
# importing libraries

from langchain.agents import create_sql_agent
from langchain.sql_database import SQLDatabase
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
import os
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
# from langchain.prompts.prompt import PromptTemplate

In [2]:
# connect to datatbase
load_dotenv()

# Access environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_NAME2 = os.getenv("DB_NAME2")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

 # to test for AdventureWorksProd db
db = SQLDatabase.from_uri(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}", view_support = True)

 # to test for Bookings db
db2 = SQLDatabase.from_uri(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME2}", view_support = True)

In [93]:
# Setting up llm

modelName="gpt-3.5-turbo-16k" # replace value with compatible model of choice
llm = ChatOpenAI(model_name=modelName, temperature=0) # change between 0 and 0.2 for testing

In [94]:
toolkit = SQLDatabaseToolkit(db=db2, llm=llm) # change db to db2 if you want to test another database
agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True
)

In [None]:
# _DEFAULT_TEMPLATE = """Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.

# Use the following format:

# Question: "Question here"
# SQLQuery: "SQL Query to run"
# SQLResult: "Result of the SQLQuery"
# Answer: "Final answer here"

# The SQL query should NOT end with semi-colon
# Question: {input}"""

# PROMPT = PromptTemplate(
#      input_variables=["input", "dialect"], template=_DEFAULT_TEMPLATE
# )


# chain = SQLDatabaseChain(llm=llm, database=db, prompt=PROMPT, verbose=True) 

## Testing Langchain on the Airport/bookings data - use db2

In [95]:
agent_executor.run("describe the bookings table")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe "bookings" table is relevant to the question. I should query the schema of the "bookings" table.
Action: sql_db_schema
Action Input: "bookings"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/[0m
Thought:[32;1m[1;3mThe "bookings" table has the following columns: book_ref, book_date, and total_amount. It contains informati

'The "bookings" table has the columns book_ref, book_date, and total_amount. It contains information about bookings, including the booking reference, booking date, and total amount. The table has a primary key constraint on the book_ref column. The table has 3 rows of data.'

In [36]:
agent_executor.run("describe the flights table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe "flights" table seems to be the most relevant table for the question. I should query its schema to get more information.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY (flight_id), 
	CONSTRAINT flights_airc

'The flights table has 10 columns and 3 sample rows.'

In [37]:
# TC1: PASS
agent_executor.run("How many rows are in the bookings table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "bookings" table to get the number of rows.
Action: sql_db_query
Action Input: "SELECT COUNT(*) FROM bookings"[0m
Observation: [36;1m[1;3m[(593433,)][0m
Thought:[32;1m[1;3mThe number of rows in the bookings table is 593,433.
Final Answer: 593,433[0m

[1m> Finished chain.[0m


'593,433'

In [38]:
# TC2: PASS
agent_executor.run("How many rows are in the aircrafts_data table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "aircrafts_data" table to get the number of rows.
Action: sql_db_query
Action Input: "SELECT COUNT(*) FROM aircrafts_data"[0m
Observation: [36;1m[1;3m[(9,)][0m
Thought:[32;1m[1;3mThe aircrafts_data table has 9 rows.
Final Answer: 9[0m

[1m> Finished chain.[0m


'9'

In [44]:
# TC3: PASS
agent_executor.run("find the top ten most bookings and order total amounts in descending order")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "bookings" table to find the top ten most bookings and their order total amounts. I should check the schema of the "bookings" table to see which columns are relevant.
Action: sql_db_schema
Action Input: "bookings"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/[0m
Thought:[32;1m[1;3mI can query the "bookings"

'The top ten most bookings and their order total amounts in descending order are:\n1. Book Ref: 3B54BB, Total Amount: $1,204,500.00\n2. Book Ref: 53B75C, Total Amount: $1,116,700.00\n3. Book Ref: 3AC131, Total Amount: $1,087,100.00\n4. Book Ref: 65A6EA, Total Amount: $1,065,600.00\n5. Book Ref: D7061C, Total Amount: $1,065,600.00\n6. Book Ref: 4B58DB, Total Amount: $1,065,600.00\n7. Book Ref: D7E9AA, Total Amount: $1,062,800.00\n8. Book Ref: EF479E, Total Amount: $1,035,100.00\n9. Book Ref: 03BCC9, Total Amount: $1,022,500.00\n10. Book Ref: 1BE923, Total Amount: $1,016,500.00'

In [45]:
# Testing for case sensitivity in values. Passenger name ANTONINA KUZNECOVA is spelled using the same case sensitivity as in the db
# executor correctly identified table and displayed the correct result based on the validation found in the validation_airports file.

#TC4: PASS
agent_executor.run("How many ticket bookings does passenger ANTONINA KUZNECOVA have? ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "bookings" table to find the number of ticket bookings for a specific passenger. I should check the schema of the "bookings" table to see which columns are relevant.
Action: sql_db_schema
Action Input: "bookings"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/[0m
Thought:[32;1m[1;3mI can query the "bookings" 

'117'

In [48]:

# langchain identified the value when the same case sensitivity used in the db was used in the prompt
# but failed to exexute query once the case sensitivity was incorrect. It did, however identify one of
# the correct tables {tickets} but joined to the {bookings} table which isn't incorrect. Executor did use case sensitivity 
# as instructed by the prompt.

# TC5: PASS 
agent_executor.run("How many ticket bookings does passenger Antonina Kuznecova have? ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "bookings" table to find the number of ticket bookings for a specific passenger. I should check the schema of the "bookings" table to see what columns are available.
Action: sql_db_schema
Action Input: "bookings"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/[0m
Thought:[32;1m[1;3mI can query the "bookings" 

'Passenger Antonina Kuznecova has 0 ticket bookings.'

In [50]:
# TC6: FAIL (it did manage to choose the correct table and identiy DME as russian airport but missed other airport codes associated
# with moscow. It also used Flight_id instead of flight_no. Additionally, it didn't account for records with no departure date)
agent_executor.run("How many distinct flights departed from Moscow?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to find the distinct flights that departed from Moscow.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY (flight_id), 
	CONSTRAINT flights_aircraft_code_fkey FOREIGN KEY(aircraft_c

'6376'

In [51]:
# It passed TC6 with Contextual Assistance
agent_executor.run("How many distinct flights departed from Moscow?Use the routes view (a view called routes) to answer the question")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "routes" view to get the information about flights departing from Moscow.
Action: sql_db_schema
Action Input: routes[0m
Observation: [33;1m[1;3m
CREATE TABLE routes (
	flight_no CHAR(6), 
	departure_airport CHAR(3), 
	departure_airport_name TEXT, 
	departure_city TEXT, 
	arrival_airport CHAR(3), 
	arrival_airport_name TEXT, 
	arrival_city TEXT, 
	aircraft_code CHAR(3), 
	duration INTERVAL, 
	days_of_week INTEGER[]
)

/*
3 rows from routes table:
flight_no	departure_airport	departure_airport_name	departure_city	arrival_airport	arrival_airport_name	arrival_city	aircraft_code	duration	days_of_week
PG0001	UIK	Ust-Ilimsk Airport	Ust Ilimsk	SGC	Surgut Airport	Surgut	CR2	2:20:0

'154'

In [52]:
# TC7: PASS
agent_executor.run("Get me the count of each flight status grouped by status and also provide the minimum and maximum departure ordered by the minimum scheduled departure ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to get the count of each flight status. I can also query the "flights" table to get the minimum and maximum departure times, ordered by the minimum scheduled departure time.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME Z

'The count of each flight status grouped by status, along with the minimum and maximum departure times ordered by the minimum scheduled departure time are as follows:\n- Arrived: 49235 flights, minimum departure time: 2017-05-16 16:00:00, maximum departure time: 2017-08-15 07:25:00\n- Cancelled: 429 flights, minimum departure time: 2017-05-17 09:10:00, maximum departure time: 2017-09-14 10:55:00\n- Departed: 58 flights, minimum departure time: 2017-08-14 23:55:00, maximum departure time: 2017-08-15 07:50:00\n- Delayed: 41 flights, minimum departure time: 2017-08-15 05:15:00, maximum departure time: 2017-08-16 07:25:00\n- On Time: 518 flights, minimum departure time: 2017-08-15 07:55:00, maximum departure time: 2017-08-16 08:00:00\n- Scheduled: 15383 flights, minimum departure time: 2017-08-16 08:05:00, maximum departure time: 2017-09-14 10:40:00'

In [53]:
#TC8 : PASS
agent_executor.run("Get me the count of each flight status grouped by status. Return only the status and count for each status type")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to get the count of each flight status. I should check the schema of the "flights" table to see the relevant columns.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY (flight_id), 

'The count of each flight status grouped by status is as follows:\n- Departed: 58\n- Arrived: 49235\n- On Time: 518\n- Cancelled: 429\n- Delayed: 41\n- Scheduled: 15383'

In [54]:
#TC9 : PASS
agent_executor.run("Get me the list of seats and the fare conditions or class where those seats are for aircraft model Cessna 208 Caravan and output the aircraft code, the model, the seat number, and the fare conditions and order it by the seat number")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe relevant tables for this question are "seats" and "aircrafts_data". I should query the schema of these tables to see what columns I can use in my query.
Action: sql_db_schema
Action Input: "seats, aircrafts_data"[0m
Observation: [33;1m[1;3m
CREATE TABLE aircrafts_data (
	aircraft_code CHAR(3) NOT NULL, 
	model JSONB NOT NULL, 
	range INTEGER NOT NULL, 
	CONSTRAINT aircrafts_pkey PRIMARY KEY (aircraft_code), 
	CONSTRAINT aircrafts_range_check CHECK (range > 0)
)

/*
3 rows from aircrafts_data table:
aircraft_code	model	range
773	{'en': 'Boeing 777-300', 'ru': 'Боинг 777-300'}	11100
763	{'en': 'Boeing 767-300', 'ru': 'Боинг 767-300'}	7900
SU9	{'en': 'Sukhoi Superjet-100', 'ru': 'Сухой

'The list of seats and fare conditions for the Cessna 208 Caravan aircraft model are as follows:\n- Aircraft Code: CN1\n- Model: Cessna 208 Caravan\n- Seat Number: 1A\n- Fare Conditions: Economy\n- Seat Number: 1B\n- Fare Conditions: Economy\n- Seat Number: 2A\n- Fare Conditions: Economy\n- Seat Number: 2B\n- Fare Conditions: Economy\n- Seat Number: 3A\n- Fare Conditions: Economy\n- Seat Number: 3B\n- Fare Conditions: Economy\n- Seat Number: 4A\n- Fare Conditions: Economy\n- Seat Number: 4B\n- Fare Conditions: Economy\n- Seat Number: 5A\n- Fare Conditions: Economy\n- Seat Number: 5B\n- Fare Conditions: Economy'

In [55]:
#test
agent_executor.run("describe the ticket_flights table")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe ticket_flights table exists in the database. I can now query the schema of the ticket_flights table to get more information.
Action: sql_db_schema
Action Input: "ticket_flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE ticket_flights (
	ticket_no CHAR(13) NOT NULL, 
	flight_id INTEGER NOT NULL, 
	fare_conditions VARCHAR(10) NOT NULL, 
	amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT ticket_flights_pkey PRIMARY KEY (ticket_no, flight_id), 
	CONSTRAINT ticket_flights_flight_id_fkey FOREIGN KEY(flight_id) REFERENCES flights (flight_id), 
	CONSTRAINT ticket_flights_ticket_no_fkey FOREIGN KEY(ticket_no) REFERENCES tickets (ticket_no), 
	CONSTRAINT ticket_flights_amount_check CHECK (amount >

'The ticket_flights table has the following columns: ticket_no, flight_id, fare_conditions, and amount.'

In [58]:
#TC10 : FAIL (Had issues identifying 2/3 necessary tables. Mainly ticket_flights, and tickets. It also hallucinated passenger_name was
# in the bookings table when it's a tickets table object.)
agent_executor.run("Get me the passenger name that had the most flights where the flight had a status of Arrived or Departed. The output should return the passenger name and a count of how many distinct flights this passenger had ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to get the passenger name and count the number of distinct flights for each passenger. I should also filter the flights based on the status column.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_

'Agent stopped due to iteration limit or time limit.'

In [59]:
# It passed with Contextual Assistance
agent_executor.run("Get me the passenger name (from the tickets table) that had the most flights (flight_no column from the flights table) \
                   where the flight had a status \
                (status from the flights table) of Arrived or Departed. The output should return the passenger name and a \
                   count of how many distinct flights this passenger had. You will also need to make sure you add the ticket_flights \
                   table in order to join the flights table and tickets table ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe relevant tables for this question are flights, tickets, and ticket_flights. I should query the schema of these tables to see what columns I can use.
Action: sql_db_schema
Action Input: "flights, tickets, ticket_flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pk

'Aleksandr Kuznecov'

In [61]:
#TC11 : FAIL (it skipped all the tables in the schema and hallucinated tables that don't exist.)
agent_executor.run("Get me the the passenger name, phone number, and email of 10 passengers ordered by passenger name \
                   in ascending order. Make sure to only retrieve data of passengers that have an email")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI see that the relevant tables for this question are passengers and contacts. I should query the schema of these tables to see the column names.
Action: sql_db_schema
Action Input: "passengers, contacts"[0m
Observation: [33;1m[1;3mError: table_names {'contacts', 'passengers'} not found in database[0m
Thought:[32;1m[1;3mOops! It seems that the tables "passengers" and "contacts" do not exist in the database. I should double check the table names by calling sql_db_list_tables again.
Action: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights,

OutputParserException: Could not parse LLM output: `Oops! It seems that the tables "passenger" and "contact" do not exist in the database. I am unable to retrieve the required information to answer the question.`

In [62]:
# Passed with Contextual Assistance
agent_executor.run("Get me the the passenger name (from the tickets table), phone number (contact_data column from tickets table),\
                    and email contact_data column from tickets table) of 10 passengers ordered by passenger name \
                   in ascending order. Make sure to only retrieve data of passengers that have an email.\
                   Note that the contact_data column is jsonb data type and contains both the email and phone number")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "tickets" table to get the passenger name, phone number, and email. I should check the schema of the "tickets" table to confirm the column names.
Action: sql_db_schema
Action Input: "tickets"[0m
Observation: [33;1m[1;3m
CREATE TABLE tickets (
	ticket_no CHAR(13) NOT NULL, 
	book_ref CHAR(6) NOT NULL, 
	passenger_id VARCHAR(20) NOT NULL, 
	passenger_name TEXT NOT NULL, 
	contact_data JSONB, 
	CONSTRAINT tickets_pkey PRIMARY KEY (ticket_no), 
	CONSTRAINT tickets_book_ref_fkey FOREIGN KEY(book_ref) REFERENCES bookings (book_ref)
)

/*
3 rows from tickets table:
ticket_no	book_ref	passenger_id	passenger_name	contact_data
0005432000860	8BBCD2	4750 122452	VLADIMIR FROLOV	{'pho

'The passenger name, phone number, and email of 10 passengers ordered by passenger name in ascending order are:\n1. Adelina Afanaseva, +70868634381, afanaseva_adelina-1960@postgrespro.ru\n2. Adelina Alekseeva, +70540852393, adelina_alekseeva-021969@postgrespro.ru\n3. Adelina Andreeva, +70068029304, andreeva-a_1958@postgrespro.ru\n4. Adelina Belyaeva, +70236254085, adelina.belyaeva.1956@postgrespro.ru\n5. Adelina Chernova, +70537037190, adelina_chernova041972@postgrespro.ru\n6. Adelina Danilova, +70595112786, danilova_adelina-18121972@postgrespro.ru\n7. Adelina Denisova, +70243788454, denisova.a-01081973@postgrespro.ru\n8. Adelina Efimova, +70920616014, a.efimova.31051981@postgrespro.ru\n9. Adelina Egorova, +70996111276, adelina_egorova15081969@postgrespro.ru\n10. Adelina Filippova, +70571835825, filippova.adelina_041975@postgrespro.ru'

In [63]:
#TC12 : FAIL (checked the airports view which was a good idea and less complicated than checking airport_data table which is what I did
# but couldn't perform a nested query with aggregate function)
agent_executor.run("Which cities have more than 1 airport? Return the airport code, airport name, and city in your response.")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "airports" table to find cities with more than 1 airport. I should check the schema of the "airports" table to see the relevant columns.
Action: sql_db_schema
Action Input: "airports"[0m
Observation: [33;1m[1;3m
CREATE TABLE airports (
	airport_code CHAR(3), 
	airport_name TEXT, 
	city TEXT, 
	coordinates TEXT, 
	timezone TEXT
)

/*
3 rows from airports table:
airport_code	airport_name	city	coordinates	timezone
YKS	Yakutsk Airport	Yakutsk	(129.77099609375,62.093299865722656)	Asia/Yakutsk
MJZ	Mirny Airport	Mirnyj	(114.03900146484375,62.534698486328125)	Asia/Yakutsk
KHV	Khabarovsk-Novy Airport	Khabarovsk	(135.18800354004,48.52799987793)	Asia/Vladivostok
*/[0m
Thought:[32

"I don't know."

In [65]:
# Passed using Contextual Assistance. This, however, was too much assistance and would require 
# expertise business users generally won't have. 
agent_executor.run("Which cities have more than 1 airport? Return the airport code, airport name,and city in your response.\
                   Consider adding city to a where clause and using a nested query to check for cities that having than 1 record\
                    and group by city")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "airports" table to find cities with more than 1 airport. I should also query the schema of the "airports" table to see the relevant columns.
Action: sql_db_schema
Action Input: "airports"[0m
Observation: [33;1m[1;3m
CREATE TABLE airports (
	airport_code CHAR(3), 
	airport_name TEXT, 
	city TEXT, 
	coordinates TEXT, 
	timezone TEXT
)

/*
3 rows from airports table:
airport_code	airport_name	city	coordinates	timezone
YKS	Yakutsk Airport	Yakutsk	(129.77099609375,62.093299865722656)	Asia/Yakutsk
MJZ	Mirny Airport	Mirnyj	(114.03900146484375,62.534698486328125)	Asia/Yakutsk
KHV	Khabarovsk-Novy Airport	Khabarovsk	(135.18800354004,48.52799987793)	Asia/Vladivostok
*/[0m
Thought

'The cities that have more than 1 airport are Moscow and Ulyanovsk.'

In [66]:
# TC13 : PASS (With exception: didn't include 4 of the columns asked in the prompt) / temperature = 0
# TC13 : PASS (Perfect execution and included all of the required columns) / Temperature = 0.2
agent_executor.run("Get me the ticket number, flight id ,fare conditions, seat number, boarding number, passenger name, and amount for flight 4686 where the status is Arrived. Order the results by amount in descending order")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the tables "flights" and "ticket_flights" to get the relevant information. I should check the schema of these tables to see what columns I can use.
Action: sql_db_schema
Action Input: "flights, ticket_flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey

'The ticket number, flight id, fare conditions, and amount for flight 4686 where the status is Arrived, ordered by amount in descending order are as follows:\nTicket Number: 0005435982449, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005435982443, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005433754424, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005432984987, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005435982482, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005432984890, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005435982462, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005433434546, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTicket Number: 0005433754427, Flight ID: 4686, Fare Conditions: Business, Amount: 184500.00\nTick

In [70]:
#TC14 : FAIL (with exception: contextually correct, joins need improvement.
# Didn't join boarding passes table which contains the flight ids and ticket numbers along with seat number)
agent_executor.run("Which flight segments are included into ticket number 0005432661915?\
                    Return the scheduled departure date only, the departure city along\
                    with the departure airport, the arrival city along with the arrival airport,\
                    flight status, and the seat numbers for each flight segment. \
                   Order by scheduled departure")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the tables "ticket_flights", "flights", "airports", and "seats" to get the information I need.
Action: sql_db_schema
Action Input: "ticket_flights, flights, airports, seats"[0m
Observation: [33;1m[1;3m
CREATE TABLE airports (
	airport_code CHAR(3), 
	airport_name TEXT, 
	city TEXT, 
	coordinates TEXT, 
	timezone TEXT
)

/*
3 rows from airports table:
airport_code	airport_name	city	coordinates	timezone
YKS	Yakutsk Airport	Yakutsk	(129.77099609375,62.093299865722656)	Asia/Yakutsk
MJZ	Mirny Airport	Mirnyj	(114.03900146484375,62.534698486328125)	Asia/Yakutsk
KHV	Khabarovsk-Novy Airport	Khabarovsk	(135.18800354004,48.52799987793)	Asia/Vladivostok
*/


CREATE TABLE flights (
	flig

'The flight segments included in ticket number 0005432661915 are as follows:\n- Scheduled Departure: 2017-07-29 06:50:00\n- Departure: Sheremetyevo International Airport, Moscow\n- Arrival: Ugolny Airport, Anadyr\n- Flight Status: Arrived\n- Seat Number: 10B, 10C, 10D, 10E, 10F, 11A, 11B, 11C, 11D, 10A'

In [72]:
# Passed with Contextual Assistance. This, however, was too much assistance and would require 
# expertise business users generally won't have.
agent_executor.run("Which flight segments are included into ticket number 0005432661915?\
                    Return the scheduled departure date only, the departure city along\
                    with the departure airport, the arrival city along with the arrival airport,\
                    flight status, and the seat numbers (from the boarding_passes table) for each flight segment. \
                   Order by scheduled departure. Get the seat number from the boarding_passes table\
                   and join it to the ticket flights table using flight_id and ticket_no columns")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the tables: boarding_passes, flights, and ticket_flights to get the information I need. I should check the schema of these tables to see what columns I can use.
Action: sql_db_schema
Action Input: "boarding_passes, flights, ticket_flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE boarding_passes (
	ticket_no CHAR(13) NOT NULL, 
	flight_id INTEGER NOT NULL, 
	boarding_no INTEGER NOT NULL, 
	seat_no VARCHAR(4) NOT NULL, 
	CONSTRAINT boarding_passes_pkey PRIMARY KEY (ticket_no, flight_id), 
	CONSTRAINT boarding_passes_ticket_no_fkey FOREIGN KEY(ticket_no, flight_id) REFERENCES ticket_flights (ticket_no, flight_id), 
	CONSTRAINT boarding_passes_flight_id_boarding_no_key UNIQUE (

'The flight segments included in ticket number 0005432661915 are as follows:\n- Scheduled departure: 2017-07-29 06:50\n- Departure city and airport: Moscow, Sheremetyevo International Airport\n- Arrival city and airport: Anadyr, Ugolny Airport\n- Flight status: Arrived\n- Seat number: 5C\n\n- Scheduled departure: 2017-08-01 16:00\n- Departure city and airport: Anadyr, Ugolny Airport\n- Arrival city and airport: Khabarovsk, Khabarovsk-Novy Airport\n- Flight status: Arrived\n- Seat number: 1D\n\n- Scheduled departure: 2017-08-02 20:15\n- Departure city and airport: Khabarovsk, Khabarovsk-Novy Airport\n- Arrival city and airport: Blagoveschensk, Ignatyevo Airport\n- Flight status: Arrived\n- Seat number: 2C\n\n- Scheduled departure: 2017-08-08 00:45\n- Departure city and airport: Blagoveschensk, Ignatyevo Airport\n- Arrival city and airport: Khabarovsk, Khabarovsk-Novy Airport\n- Flight status: Arrived\n- Seat number: 2D\n\n- Scheduled departure: 2017-08-11 17:20\n- Departure city and air

In [77]:
# TC15 : PASS 
agent_executor.run("Which flight segments are included into ticket number 0005432661915? \
                   Return the scheduled departure date only, the departure city along with the departure airport,\
                   the arrival city along with the arrival airport, the fare conditions, \
                   and the amount each transaction cost. order by scheduled departure")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI see that the relevant tables for this question are ticket_flights, flights, and airports. I should query the schema of these tables to see what columns I can use in my query.
Action: sql_db_schema
Action Input: "ticket_flights, flights, airports"[0m
Observation: [33;1m[1;3m
CREATE TABLE airports (
	airport_code CHAR(3), 
	airport_name TEXT, 
	city TEXT, 
	coordinates TEXT, 
	timezone TEXT
)

/*
3 rows from airports table:
airport_code	airport_name	city	coordinates	timezone
YKS	Yakutsk Airport	Yakutsk	(129.77099609375,62.093299865722656)	Asia/Yakutsk
MJZ	Mirny Airport	Mirnyj	(114.03900146484375,62.534698486328125)	Asia/Yakutsk
KHV	Khabarovsk-Novy Airport	Khabarovsk	(135.18800354004,48.

'The flight segments included in ticket number 0005432661915 are as follows:\n1. Scheduled Departure: 2017-07-29 06:50:00, Departure: Sheremetyevo International Airport (SVO), Arrival: Ugolny Airport (DYR), Fare Conditions: Business, Amount: 185300.00\n2. Scheduled Departure: 2017-08-01 16:00:00, Departure: Ugolny Airport (DYR), Arrival: Khabarovsk-Novy Airport (KHV), Fare Conditions: Business, Amount: 92200.00\n3. Scheduled Departure: 2017-08-02 20:15:00, Departure: Khabarovsk-Novy Airport (KHV), Arrival: Ignatyevo Airport (BQS), Fare Conditions: Business, Amount: 18000.00\n4. Scheduled Departure: 2017-08-08 00:45:00, Departure: Ignatyevo Airport (BQS), Arrival: Khabarovsk-Novy Airport (KHV), Fare Conditions: Business, Amount: 18000.00\n5. Scheduled Departure: 2017-08-11 17:20:00, Departure: Khabarovsk-Novy Airport (KHV), Arrival: Ugolny Airport (DYR), Fare Conditions: Economy, Amount: 30700.00\n6. Scheduled Departure: 2017-08-16 17:45:00, Departure: Ugolny Airport (DYR), Arrival: She

In [86]:
# TC16 : PASS (With exception: context was correct and the data pulled was correct but the format isn't what I asked for)
agent_executor.run("Get me the aircraft code and the fare conditions along with each fare conditions count per aircraft code. \
                   For example, aircraft code 319 would have a fare conditions of business(20), Economy(96) where 96 represent\
                    seats in economy fare conditions while 20 represents 20 business seats. \
                   The output should have the aircraft_code, and fare_conditions with their seat count in the same record. ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts, aircrafts_data, airports, airports_data, boarding_passes, bookings, flights, flights_v, routes, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe relevant tables for this question are likely "aircrafts" and "seats". I should query the schema of these tables to see what columns are available.
Action: sql_db_schema
Action Input: "aircrafts, seats"[0m
Observation: [33;1m[1;3m
CREATE TABLE aircrafts (
	aircraft_code CHAR(3), 
	model TEXT, 
	range INTEGER
)

/*
3 rows from aircrafts table:
aircraft_code	model	range
773	Boeing 777-300	11100
763	Boeing 767-300	7900
SU9	Sukhoi Superjet-100	3000
*/


CREATE TABLE seats (
	aircraft_code CHAR(3) NOT NULL, 
	seat_no VARCHAR(4) NOT NULL, 
	fare_conditions VARCHAR(10) NOT NULL, 
	CONSTRAINT seats_pkey PRIMARY KEY (aircraft_code, seat_no), 
	CONSTRAINT seats_aircraft_code_fkey FOREIGN KEY(a

OutputParserException: Could not parse LLM output: `I now know the final answer.`

## Testing Langchain on the AdventureWorksProd database

In [124]:
from langchain.chains.sql_database.base import SQLDatabaseChain
chain = SQLDatabaseChain.from_llm(llm, db, verbose=True)

In [125]:
chain.run("describe the employee table")



[1m> Entering new SQLDatabaseChain chain...[0m
describe the employee table
SQLQuery:[32;1m[1;3mSELECT column_name, data_type 
FROM information_schema.columns 
WHERE table_name = 'employee';[0m
SQLResult: [33;1m[1;3m[('businessentityid', 'integer'), ('salariedflag', 'boolean'), ('vacationhours', 'smallint'), ('sickleavehours', 'smallint'), ('currentflag', 'boolean'), ('rowguid', 'uuid'), ('modifieddate', 'timestamp without time zone'), ('birthdate', 'date'), ('hiredate', 'date'), ('nationalidnumber', 'character varying'), ('loginid', 'character varying'), ('jobtitle', 'character varying'), ('organizationnode', 'character varying'), ('maritalstatus', 'character'), ('gender', 'character')][0m
Answer:[32;1m[1;3mThe employee table has the following columns:
- businessentityid (integer)
- salariedflag (boolean)
- vacationhours (smallint)
- sickleavehours (smallint)
- currentflag (boolean)
- rowguid (uuid)
- modifieddate (timestamp without time zone)
- birthdate (date)
- hiredate 

'The employee table has the following columns:\n- businessentityid (integer)\n- salariedflag (boolean)\n- vacationhours (smallint)\n- sickleavehours (smallint)\n- currentflag (boolean)\n- rowguid (uuid)\n- modifieddate (timestamp without time zone)\n- birthdate (date)\n- hiredate (date)\n- nationalidnumber (character varying)\n- loginid (character varying)\n- jobtitle (character varying)\n- organizationnode (character varying)\n- maritalstatus (character)\n- gender (character)'

In [126]:
# TC1 : (temperature = 0)  | PASS
# TC1 : (temperature = 0.2)  | PASS
chain.run("How many rows are in the prod.employee table? Please do not reference humanresources.employee table. That schema no longer exists")



[1m> Entering new SQLDatabaseChain chain...[0m
How many rows are in the prod.employee table? Please do not reference humanresources.employee table. That schema no longer exists
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FROM prod.employee;[0m
SQLResult: [33;1m[1;3m[(290,)][0m
Answer:[32;1m[1;3mFinal answer here: There are 290 rows in the prod.employee table.[0m
[1m> Finished chain.[0m


'Final answer here: There are 290 rows in the prod.employee table.'

In [127]:
# TC2 : (temperature = 0)  | PASS
# TC2 : (temperature = 0.2)  | PASS
chain.run("Get me the jobtitle, maritalstatus, gender, and vacationhours of the top 5 employee with the most vacation hours from prod.employee")



[1m> Entering new SQLDatabaseChain chain...[0m
Get me the jobtitle, maritalstatus, gender, and vacationhours of the top 5 employee with the most vacation hours from prod.employee
SQLQuery:[32;1m[1;3mSELECT "jobtitle", "maritalstatus", "gender", "vacationhours"
FROM prod.employee
ORDER BY "vacationhours" DESC
LIMIT 5;[0m
SQLResult: [33;1m[1;3m[('Production Technician - WC50', 'M', 'M', 99), ('Production Technician - WC10', 'S', 'F', 99), ('Chief Executive Officer', 'S', 'M', 99), ('Production Technician - WC10', 'M', 'M', 98), ('Production Technician - WC50', 'S', 'M', 98)][0m
Answer:[32;1m[1;3mThe jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the most vacation hours from prod.employee are:

1. Job Title: Production Technician - WC50, Marital Status: M, Gender: M, Vacation Hours: 99
2. Job Title: Production Technician - WC10, Marital Status: S, Gender: F, Vacation Hours: 99
3. Job Title: Chief Executive Officer, Marital Status: S, Gender: M, 

'The jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the most vacation hours from prod.employee are:\n\n1. Job Title: Production Technician - WC50, Marital Status: M, Gender: M, Vacation Hours: 99\n2. Job Title: Production Technician - WC10, Marital Status: S, Gender: F, Vacation Hours: 99\n3. Job Title: Chief Executive Officer, Marital Status: S, Gender: M, Vacation Hours: 99\n4. Job Title: Production Technician - WC10, Marital Status: M, Gender: M, Vacation Hours: 98\n5. Job Title: Production Technician - WC50, Marital Status: S, Gender: M, Vacation Hours: 98'

In [128]:
# TC3 : (temperature = 0) |  PASS
# TC3 : (temperature = 0.2) |  PASS
chain.run("Get me the jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the least vacation hours from prod.employee")



[1m> Entering new SQLDatabaseChain chain...[0m
Get me the jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the least vacation hours from prod.employee
SQLQuery:[32;1m[1;3mSELECT "jobtitle", "maritalstatus", "gender", "vacationhours"
FROM prod.employee
ORDER BY "vacationhours" ASC
LIMIT 5;[0m
SQLResult: [33;1m[1;3m[('Chief Financial Officer', 'M', 'F', 0), ('Production Technician - WC20', 'S', 'M', 0), ('Production Technician - WC50', 'S', 'M', 0), ('Production Technician - WC50', 'M', 'M', 1), ('Vice President of Engineering', 'S', 'F', 1)][0m
Answer:[32;1m[1;3mThe jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the least vacation hours are:
1. Job Title: Chief Financial Officer, Marital Status: M, Gender: F, Vacation Hours: 0
2. Job Title: Production Technician - WC20, Marital Status: S, Gender: M, Vacation Hours: 0
3. Job Title: Production Technician - WC50, Marital Status: S, Gender: M, Vacation Hours: 0
4. Job

'The jobtitle, maritalstatus, gender, and vacationhours of the top 5 employees with the least vacation hours are:\n1. Job Title: Chief Financial Officer, Marital Status: M, Gender: F, Vacation Hours: 0\n2. Job Title: Production Technician - WC20, Marital Status: S, Gender: M, Vacation Hours: 0\n3. Job Title: Production Technician - WC50, Marital Status: S, Gender: M, Vacation Hours: 0\n4. Job Title: Production Technician - WC50, Marital Status: M, Gender: M, Vacation Hours: 1\n5. Job Title: Vice President of Engineering, Marital Status: S, Gender: F, Vacation Hours: 1'

In [129]:
# TC4 : (temperature = 0) | FAIL
# TC4 : (temperature = 0.2) | FAIL (salary_type is not a column, value 'salaried' is not a valid input)
chain.run("Get me the number of employees that are salaried from the prod.employee table")



[1m> Entering new SQLDatabaseChain chain...[0m
Get me the number of employees that are salaried from the prod.employee table
SQLQuery:[32;1m[1;3mSELECT COUNT(*) 
FROM prod.employee 
WHERE salary_type = 'salaried';[0m

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "salary_type" does not exist
LINE 3: WHERE salary_type = 'salaried';
              ^

[SQL: SELECT COUNT(*) 
FROM prod.employee 
WHERE salary_type = 'salaried';]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [130]:
# TC5 : (temperature = 0) PASS
# TC5 : (temperature = 0.2) PASS
chain.run("how many males and females employees are there in the prod.employee table?")



[1m> Entering new SQLDatabaseChain chain...[0m
how many males and females employees are there in the prod.employee table?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) FILTER (WHERE gender = 'M') AS males, COUNT(*) FILTER (WHERE gender = 'F') AS females
FROM prod.employee;[0m
SQLResult: [33;1m[1;3m[(206, 84)][0m
Answer:[32;1m[1;3mThere are 206 male employees and 84 female employees in the prod.employee table.[0m
[1m> Finished chain.[0m


'There are 206 male employees and 84 female employees in the prod.employee table.'

In [131]:
# TC6 : (temperature = 0) FAIL (it had the right idea but failed due to incorrect column name reference. Hallucinating column names?)
# TC6 : (temperature = 0.2)
chain.run("Get all the employee records for the 10 oldest employees from the prod.employee table and calculate their individual age.")



[1m> Entering new SQLDatabaseChain chain...[0m
Get all the employee records for the 10 oldest employees from the prod.employee table and calculate their individual age.
SQLQuery:[32;1m[1;3mSELECT "employee_id", "first_name", "last_name", "DATE_PART('year', CURRENT_DATE) - DATE_PART('year', "birth_date") AS age
FROM "prod"."employee"
ORDER BY "birth_date" ASC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.SyntaxError) syntax error at or near "") AS age
FROM ""
LINE 1: ...ear', CURRENT_DATE) - DATE_PART('year', "birth_date") AS age
                                                              ^

[SQL: SELECT "employee_id", "first_name", "last_name", "DATE_PART('year', CURRENT_DATE) - DATE_PART('year', "birth_date") AS age
FROM "prod"."employee"
ORDER BY "birth_date" ASC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [132]:
# TC7 : (temperature = 0)  | FAIL
# TC7 : (temperature = 0.2)  | FAIL (Employees is not a table, StartDate is not a column)
chain.run("How many years has the employee with the job title Chief Executive Officer been working at the company?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many years has the employee with the job title Chief Executive Officer been working at the company?
SQLQuery:[32;1m[1;3mSELECT EXTRACT(YEAR FROM CURRENT_DATE) - EXTRACT(YEAR FROM "StartDate") AS years_working
FROM "Employees"
WHERE "JobTitle" = 'Chief Executive Officer'
LIMIT 1;[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "Employees" does not exist
LINE 2: FROM "Employees"
             ^

[SQL: SELECT EXTRACT(YEAR FROM CURRENT_DATE) - EXTRACT(YEAR FROM "StartDate") AS years_working
FROM "Employees"
WHERE "JobTitle" = 'Chief Executive Officer'
LIMIT 1;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [134]:
# TC8 : (temperature = 0)  | FAIL (Can't seem to discern jobtile from "job title"
# so it's converting columns to camel case. It got the context correctly. salary_type is not a column, employees is not a table)
# TC8 : (temperature = 0.2)  | FAIL 
chain.run("How many employees that have the term Marketing in the job title are salaried?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees that have the term Marketing in the job title are salaried?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) 
FROM employees 
WHERE job_title LIKE '%Marketing%' AND salary_type = 'Salaried'
LIMIT 5;[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "employees" does not exist
LINE 2: FROM employees 
             ^

[SQL: SELECT COUNT(*) 
FROM employees 
WHERE job_title LIKE '%%Marketing%%' AND salary_type = 'Salaried'
LIMIT 5;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [135]:

# It passes TC8 when I give it a ton of assistance
chain.run("How many employees (in the prod.employee table) whose jobtitle contain the term 'Marketing' are salaried. The salariedflag=True when an employee is salaried, else, false ")



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees (in the prod.employee table) whose jobtitle contain the term 'Marketing' are salaried. The salariedflag=True when an employee is salaried, else, false 
SQLQuery:[32;1m[1;3mSELECT COUNT(*) 
FROM prod.employee 
WHERE jobtitle LIKE '%Marketing%' 
AND salariedflag = true;[0m
SQLResult: [33;1m[1;3m[(1,)][0m
Answer:[32;1m[1;3mFinal answer here: There is 1 employee in the prod.employee table whose jobtitle contains the term 'Marketing' and is salaried.[0m
[1m> Finished chain.[0m


"Final answer here: There is 1 employee in the prod.employee table whose jobtitle contains the term 'Marketing' and is salaried."

In [136]:
# TC9: (temperature = 0)  (FAIL: it completely forgot prior tables referenced and made up two columns (job_title, salary_type) in its search)
# TC9: (temperature = 0.2) 
chain.run("How many employees that have the term 'Marketing' in their job title are not salaried?")



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees that have the term 'Marketing' in their job title are not salaried?
SQLQuery:[32;1m[1;3mSELECT COUNT(*) 
FROM employees 
WHERE job_title LIKE '%Marketing%' 
AND salary_type <> 'Salaried';[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "employees" does not exist
LINE 2: FROM employees 
             ^

[SQL: SELECT COUNT(*) 
FROM employees 
WHERE job_title LIKE '%%Marketing%%' 
AND salary_type <> 'Salaried';]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [137]:
# It passed when I provided contextual assistance to the prompt
chain.run("How many employees (in the prod.employee tbale) that have the term 'Marketing' in their job title are not salaried? The salariedflag=True when an employee is salaried, else, false")



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees (in the prod.employee tbale) that have the term 'Marketing' in their job title are not salaried? The salariedflag=True when an employee is salaried, else, false
SQLQuery:[32;1m[1;3mSELECT COUNT(*) 
FROM prod.employee 
WHERE jobtitle LIKE '%Marketing%' AND salariedflag = false;[0m
SQLResult: [33;1m[1;3m[(8,)][0m
Answer:[32;1m[1;3mFinal answer: There are 8 employees in the prod.employee table that have the term 'Marketing' in their job title and are not salaried.[0m
[1m> Finished chain.[0m


"Final answer: There are 8 employees in the prod.employee table that have the term 'Marketing' in their job title and are not salaried."

In [138]:
# TC10 : (temperature = 0)  FAIL (Referenced the employees table instead of the employee table, and employeetype column which doesn't exist)
# TC10 : (temperature = 0.2) FAIL (EmployeeType is not a column)
chain.run("How many vacationhours, in days, do salaried employees with the highest vacation hours have? Limit to the top 10 records")



[1m> Entering new SQLDatabaseChain chain...[0m
How many vacationhours, in days, do salaried employees with the highest vacation hours have? Limit to the top 10 records
SQLQuery:[32;1m[1;3mSELECT "Employee"."VacationHours" / 8 AS "VacationDays"
FROM "Employee"
WHERE "Employee"."EmployeeType" = 'Salaried'
ORDER BY "Employee"."VacationHours" DESC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "Employee" does not exist
LINE 2: FROM "Employee"
             ^

[SQL: SELECT "Employee"."VacationHours" / 8 AS "VacationDays"
FROM "Employee"
WHERE "Employee"."EmployeeType" = 'Salaried'
ORDER BY "Employee"."VacationHours" DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [139]:
# It passes when I gave it contextual assistance and object references
# failed for on 0.2 model temp. "VacationHours" is not a column, vacationhours is, "SalariedFlag" is not a column,
chain.run("How many vacationhours, in days, do salaried employees (from the prod.employee table) with the highest vacationhours have? Limit to the top 10 records. The salariedflag=True when an employee is salaried, else, false")



[1m> Entering new SQLDatabaseChain chain...[0m
How many vacationhours, in days, do salaried employees (from the prod.employee table) with the highest vacationhours have? Limit to the top 10 records. The salariedflag=True when an employee is salaried, else, false
SQLQuery:[32;1m[1;3mSELECT "VacationHours" / 8 AS "VacationDays"
FROM prod.employee
WHERE "SalariedFlag" = true
ORDER BY "VacationHours" DESC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "VacationHours" does not exist
LINE 1: SELECT "VacationHours" / 8 AS "VacationDays"
               ^
HINT:  Perhaps you meant to reference the column "employee.vacationhours".

[SQL: SELECT "VacationHours" / 8 AS "VacationDays"
FROM prod.employee
WHERE "SalariedFlag" = true
ORDER BY "VacationHours" DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [140]:
# TC11 : (temperature = 0)  | FAIL (Column Salaried_Type and Vacation_hours don't exist, neither does employees table)
# TC11 : (temperature = 0.2) | FAIL (emmployees is not a table. However, It recognized SalariedFlag as a column)
chain.run("How many vacation hours, in days, do the top 10 not salaried employees with the highest vacation hours individually have? ")



[1m> Entering new SQLDatabaseChain chain...[0m
How many vacation hours, in days, do the top 10 not salaried employees with the highest vacation hours individually have? 
SQLQuery:[32;1m[1;3mSELECT "EmployeeID", "VacationHours" / 8 AS "VacationDays"
FROM "Employees"
WHERE "SalariedFlag" = false
ORDER BY "VacationHours" DESC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "Employees" does not exist
LINE 2: FROM "Employees"
             ^

[SQL: SELECT "EmployeeID", "VacationHours" / 8 AS "VacationDays"
FROM "Employees"
WHERE "SalariedFlag" = false
ORDER BY "VacationHours" DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [141]:
# passed with additional contextual assistance 
# failed with temp 0.2 as it continues to add quotation marks around column, schema, and tables names
chain.run("How many vacationhours, in days, do the not salaried employees (in the prod.employee table) with the highest vacation hours individually have?The salariedflag=True when an employee is salaried, else, false. Limit to the top 10 records ")



[1m> Entering new SQLDatabaseChain chain...[0m
How many vacationhours, in days, do the not salaried employees (in the prod.employee table) with the highest vacation hours individually have?The salariedflag=True when an employee is salaried, else, false. Limit to the top 10 records 
SQLQuery:[32;1m[1;3mSELECT "VacationHours" / 8 AS "VacationDays"
FROM "prod"."employee"
WHERE "SalariedFlag" = false
ORDER BY "VacationHours" DESC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "VacationHours" does not exist
LINE 1: SELECT "VacationHours" / 8 AS "VacationDays"
               ^
HINT:  Perhaps you meant to reference the column "employee.vacationhours".

[SQL: SELECT "VacationHours" / 8 AS "VacationDays"
FROM "prod"."employee"
WHERE "SalariedFlag" = false
ORDER BY "VacationHours" DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [142]:
# TC12 : (temperature = 0)  | FAIL
# TC12 : (temperature = 0.2)  | FAIL (Job title is not a column, Salary Type is not a column. Used incorrect syntax and broke SQL rule)
chain.run("How many vacation hours, in days, do the top 10 not salaried employees with the highest vacation hours individually have? Only return the Job title and vacation days")



[1m> Entering new SQLDatabaseChain chain...[0m
How many vacation hours, in days, do the top 10 not salaried employees with the highest vacation hours individually have? Only return the Job title and vacation days
SQLQuery:[32;1m[1;3mSELECT "Job title", "Vacation hours" / 8 AS "Vacation days"
FROM employees
WHERE "Salary type" != 'Salaried'
ORDER BY "Vacation hours" DESC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "employees" does not exist
LINE 2: FROM employees
             ^

[SQL: SELECT "Job title", "Vacation hours" / 8 AS "Vacation days"
FROM employees
WHERE "Salary type" != 'Salaried'
ORDER BY "Vacation hours" DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [143]:
#  test case passed
# test case failed with temp @ 0.2, added quotations around all db objects
chain.run("How many vacationhours, in days, do non-salaried employees (in the prod.employee table) with the highest vacationhours individually have?The salariedflag=True when an employee is salaried, else, false. Only return the Job title and vacation day and Limit to the top 10 records")



[1m> Entering new SQLDatabaseChain chain...[0m
How many vacationhours, in days, do non-salaried employees (in the prod.employee table) with the highest vacationhours individually have?The salariedflag=True when an employee is salaried, else, false. Only return the Job title and vacation day and Limit to the top 10 records
SQLQuery:[32;1m[1;3mSELECT "JobTitle", "VacationHours"/8 AS "VacationDays"
FROM prod.employee
WHERE "SalariedFlag" = false
ORDER BY "VacationHours" DESC
LIMIT 10;[0m

ProgrammingError: (psycopg2.errors.UndefinedColumn) column "JobTitle" does not exist
LINE 1: SELECT "JobTitle", "VacationHours"/8 AS "VacationDays"
               ^
HINT:  Perhaps you meant to reference the column "employee.jobtitle".

[SQL: SELECT "JobTitle", "VacationHours"/8 AS "VacationDays"
FROM prod.employee
WHERE "SalariedFlag" = false
ORDER BY "VacationHours" DESC
LIMIT 10;]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [144]:
# TC13 : (temperature = 0)  | FAIL (referenced employees table which doesn't exist 
# instead of referencing employeedepartmenthistory and department Tables.Department_Group_Name should groupname)

# TC13 : (temperature = 0.2)  | FAIL (referenced departments table which doesn't exist instead of referencing employeedepartmenthistory 
# and department Tables. Department_Group_Name should groupname. Added quotations on db objects)
chain.run("How many employees does each department have? Return the department group name and employee count for each department group name")



[1m> Entering new SQLDatabaseChain chain...[0m
How many employees does each department have? Return the department group name and employee count for each department group name
SQLQuery:[32;1m[1;3mSELECT "department_group_name", COUNT(*) AS "employee_count"
FROM "departments"
GROUP BY "department_group_name"[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "departments" does not exist
LINE 2: FROM "departments"
             ^

[SQL: SELECT "department_group_name", COUNT(*) AS "employee_count"
FROM "departments"
GROUP BY "department_group_name"]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [145]:
#  Passed when I add contextual assistance
chain.run("How many distinct employees (businessentityid from the prod.employeedepartmenthistory table) does each department have? Return the department groupname (from the prod.department table) and employee count for each department groupname")



[1m> Entering new SQLDatabaseChain chain...[0m
How many distinct employees (businessentityid from the prod.employeedepartmenthistory table) does each department have? Return the department groupname (from the prod.department table) and employee count for each department groupname
SQLQuery:[32;1m[1;3mSELECT d.groupname, COUNT(DISTINCT edh.businessentityid) AS employee_count
FROM prod.department d
JOIN prod.employeedepartmenthistory edh ON d.departmentid = edh.departmentid
GROUP BY d.groupname
ORDER BY employee_count DESC;[0m
SQLResult: [33;1m[1;3m[('Manufacturing', 185), ('Executive General and Administration', 35), ('Sales and Marketing', 28), ('Inventory Management', 19), ('Research and Development', 14), ('Quality Assurance', 12)][0m
Answer:[32;1m[1;3mFinal answer: Each department has the following number of distinct employees:
- Manufacturing: 185
- Executive General and Administration: 35
- Sales and Marketing: 28
- Inventory Management: 19
- Research and Development: 1

'Final answer: Each department has the following number of distinct employees:\n- Manufacturing: 185\n- Executive General and Administration: 35\n- Sales and Marketing: 28\n- Inventory Management: 19\n- Research and Development: 14\n- Quality Assurance: 12'

In [146]:
# TC14 : (temperature = 0) | FAIL (departments is not a table, departments_group_name is not a column)
# TC14 : (temperature = 0.2) | FAIL (added quotations on all db objects. employees is not a table, is_active is not a column
# departments is not a table)
chain.run("How many active employees does each department have? Return the department group name and employee count for each department group name")



[1m> Entering new SQLDatabaseChain chain...[0m
How many active employees does each department have? Return the department group name and employee count for each department group name
SQLQuery:[32;1m[1;3mSELECT "department_group_name", COUNT(*) AS "employee_count"
FROM "departments"
JOIN "employees" ON "departments"."department_id" = "employees"."department_id"
WHERE "employees"."is_active" = true
GROUP BY "department_group_name"[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "departments" does not exist
LINE 2: FROM "departments"
             ^

[SQL: SELECT "department_group_name", COUNT(*) AS "employee_count"
FROM "departments"
JOIN "employees" ON "departments"."department_id" = "employees"."department_id"
WHERE "employees"."is_active" = true
GROUP BY "department_group_name"]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [147]:
# Passed with contextual assistance 
chain.run("How many distinct active employees (businessentityid from the prod.employeedepartmenthistory table) does each department (from the prod.department table) have? Return the department groupname and employee count for each department group name")



[1m> Entering new SQLDatabaseChain chain...[0m
How many distinct active employees (businessentityid from the prod.employeedepartmenthistory table) does each department (from the prod.department table) have? Return the department groupname and employee count for each department group name
SQLQuery:[32;1m[1;3mSELECT d.groupname, COUNT(DISTINCT edh.businessentityid) AS employee_count
FROM prod.department d
JOIN prod.employeedepartmenthistory edh ON d.departmentid = edh.departmentid
WHERE edh.enddate IS NULL
GROUP BY d.groupname
ORDER BY employee_count DESC;[0m
SQLResult: [33;1m[1;3m[('Manufacturing', 185), ('Executive General and Administration', 35), ('Sales and Marketing', 27), ('Inventory Management', 18), ('Research and Development', 14), ('Quality Assurance', 11)][0m
Answer:[32;1m[1;3mFinal answer: Each department has the following number of distinct active employees:
- Manufacturing: 185
- Executive General and Administration: 35
- Sales and Marketing: 27
- Inventory Man

'Final answer: Each department has the following number of distinct active employees:\n- Manufacturing: 185\n- Executive General and Administration: 35\n- Sales and Marketing: 27\n- Inventory Management: 18\n- Research and Development: 14\n- Quality Assurance: 11'

In [148]:
# TC15 : (temperature = 0)  | FAIL (employee_id is not a column, shift_type is not a name, status is not a column, 
# and employees is not a table)

# TC15 : (temperature = 0.2)  | FAIL (employee_id is not a column, shift_type is not a name, status is not a column, 
# and employees is not a table. Added quotation on all db objects)
chain.run(" How many distinct active employees work in each shift type? Return the count and shift type only")



[1m> Entering new SQLDatabaseChain chain...[0m
 How many distinct active employees work in each shift type? Return the count and shift type only
SQLQuery:[32;1m[1;3mSELECT COUNT(DISTINCT "employee_id"), "shift_type"
FROM "employees"
WHERE "is_active" = true
GROUP BY "shift_type"[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "employees" does not exist
LINE 2: FROM "employees"
             ^

[SQL: SELECT COUNT(DISTINCT "employee_id"), "shift_type"
FROM "employees"
WHERE "is_active" = true
GROUP BY "shift_type"]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [149]:
# Passed with contextual Assistance
chain.run(" How many distinct active employees (businessentityid from the prod.employeedepartmenthistory table) work in each shift type (name column from the prod.shift table)? Return the count and shift type only")



[1m> Entering new SQLDatabaseChain chain...[0m
 How many distinct active employees (businessentityid from the prod.employeedepartmenthistory table) work in each shift type (name column from the prod.shift table)? Return the count and shift type only
SQLQuery:[32;1m[1;3mSELECT COUNT(DISTINCT edh.businessentityid), s.name
FROM prod.employeedepartmenthistory edh
JOIN prod.shift s ON edh.shiftid = s.shiftid
WHERE edh.enddate IS NULL
GROUP BY s.name[0m
SQLResult: [33;1m[1;3m[(176, 'Day'), (62, 'Evening'), (52, 'Night')][0m
Answer:[32;1m[1;3mThere are 176 distinct active employees working in the "Day" shift, 62 distinct active employees working in the "Evening" shift, and 52 distinct active employees working in the "Night" shift.[0m
[1m> Finished chain.[0m


'There are 176 distinct active employees working in the "Day" shift, 62 distinct active employees working in the "Evening" shift, and 52 distinct active employees working in the "Night" shift.'

In [150]:
# TC16 : (temperature = 0)  | FAIL (no table called departments, no table called employees, no table called shifts 
# shift_type is not a column, status is not a column, employee_id is not a column)

# TC16 : (temperature = 0.2)  | FAIL (no table called departments, no table called employees, no table called shifts 
# shift_type is not a column, status is not a column, employee_id is not a column, end_time is not a column. 
# Input value case sensitivity issue. Added quotations on all db objects. Unnecessary logic inclusion: "end_time > CURRENT_DATE")
chain.run("Which departments have active employees that work the night shift and how many employees are there?")



[1m> Entering new SQLDatabaseChain chain...[0m
Which departments have active employees that work the night shift and how many employees are there?
SQLQuery:[32;1m[1;3mSELECT "department_name", COUNT(*) 
FROM "departments" 
JOIN "employees" ON "departments"."department_id" = "employees"."department_id" 
JOIN "shifts" ON "employees"."employee_id" = "shifts"."employee_id" 
WHERE "shifts"."shift_name" = 'night' 
AND "shifts"."end_time" > CURRENT_DATE 
GROUP BY "department_name"[0m

ProgrammingError: (psycopg2.errors.UndefinedTable) relation "departments" does not exist
LINE 2: FROM "departments" 
             ^

[SQL: SELECT "department_name", COUNT(*) 
FROM "departments" 
JOIN "employees" ON "departments"."department_id" = "employees"."department_id" 
JOIN "shifts" ON "employees"."employee_id" = "shifts"."employee_id" 
WHERE "shifts"."shift_name" = 'night' 
AND "shifts"."end_time" > CURRENT_DATE 
GROUP BY "department_name"]
(Background on this error at: https://sqlalche.me/e/20/f405)

In [151]:
# FAIL: issue with case sensitivity with the 'night' value even when I added a note in parenthesis regarding case sensitivity
# Also, didn't include logic for active employee which is to ensure enddate is not null. 
# in 0.2 temp added incorrect logic for Active employee: s.starttime > '00:00:00' AND s.endtime < '06:00:00' 
chain.run("Which departments (groupname from the prod.department table) have active employees that work at night(case sensitive) (name from the prod.shift table) and how many employees (businessentityid from the prod.employeedepartmenthistory table) are there?")



[1m> Entering new SQLDatabaseChain chain...[0m
Which departments (groupname from the prod.department table) have active employees that work at night(case sensitive) (name from the prod.shift table) and how many employees (businessentityid from the prod.employeedepartmenthistory table) are there?
SQLQuery:

Retrying langchain.chat_models.openai.ChatOpenAI.completion_with_retry.<locals>._completion_with_retry in 1.0 seconds as it raised APIError: Request failed due to server shutdown {
  "error": {
    "message": "Request failed due to server shutdown",
    "type": "server_error",
    "param": null,
    "code": null
  }
}
 500 {'error': {'message': 'Request failed due to server shutdown', 'type': 'server_error', 'param': None, 'code': None}} {'Date': 'Fri, 25 Aug 2023 12:38:09 GMT', 'Content-Type': 'application/json', 'Content-Length': '141', 'Connection': 'keep-alive', 'access-control-allow-origin': '*', 'openai-model': 'gpt-3.5-turbo-16k-0613', 'openai-organization': 'user-bxpelthnfhjktajxwu3m1bns', 'openai-processing-ms': '13932', 'openai-version': '2020-10-01', 'strict-transport-security': 'max-age=15724800; includeSubDomains', 'x-ratelimit-limit-requests': '3500', 'x-ratelimit-limit-tokens': '180000', 'x-ratelimit-remaining-requests': '3499', 'x-ratelimit-remaining-tokens': '179643', 

[32;1m[1;3mSELECT d.groupname, COUNT(edh.businessentityid) 
FROM prod.department d 
JOIN prod.employeedepartmenthistory edh ON d.departmentid = edh.departmentid 
JOIN prod.shift s ON edh.shiftid = s.shiftid 
WHERE s.name = 'night' AND s.starttime > '00:00:00' AND s.endtime < '06:00:00' 
GROUP BY d.groupname[0m
SQLResult: [33;1m[1;3m[][0m
Answer:[32;1m[1;3mFinal answer here: There are no departments with active employees that work at night.[0m
[1m> Finished chain.[0m


'Final answer here: There are no departments with active employees that work at night.'

In [152]:
# Passes with more additional contextual assistance. Letting it know that 
# filtering values may be case sensitive prompted it to use the ILIKE function to adjust for all case sensitive scenarios

# in 0.2 temp, failed. Added incorrect Active employye logic: edh."enddate" > CURRENT_DATE. unnecessary quotations on db objects.
chain.run("Which departments (groupname from the prod.department table) have active (enddate in the prod.employeedepartmenthistory table) employees that work at night (name from the prod.shift table) and how many employees (businessentityid from the prod.employeedepartmenthistory table) are there? Filtering condition may require you to handle case sensitivity")



[1m> Entering new SQLDatabaseChain chain...[0m
Which departments (groupname from the prod.department table) have active (enddate in the prod.employeedepartmenthistory table) employees that work at night (name from the prod.shift table) and how many employees (businessentityid from the prod.employeedepartmenthistory table) are there? Filtering condition may require you to handle case sensitivity
SQLQuery:[32;1m[1;3mSELECT d."groupname", COUNT(edh."businessentityid") AS num_employees
FROM prod.department d
JOIN prod.employeedepartmenthistory edh ON d."departmentid" = edh."departmentid"
JOIN prod.shift s ON edh."shiftid" = s."shiftid"
WHERE edh."enddate" > CURRENT_DATE
AND LOWER(s."name") LIKE '%night%'
GROUP BY d."groupname"[0m
SQLResult: [33;1m[1;3m[][0m
Answer:[32;1m[1;3mFinal answer here: There are no departments with active employees that work at night.[0m
[1m> Finished chain.[0m


'Final answer here: There are no departments with active employees that work at night.'