In [19]:
# importing libraries

from langchain.agents import initialize_agent
from langchain.agents import AgentType
from langchain.llms import OpenAI
from langchain.agents import *
from langchain.sql_database import SQLDatabase
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.agents import AgentExecutor
from langchain import OpenAI, SQLDatabaseChain
from langchain import PromptTemplate, LLMChain
from langchain import HuggingFacePipeline
import sqlalchemy as sqla
from sqlalchemy import text
import pandas as pd
import os
from dotenv import load_dotenv
from langchain.chat_models import ChatOpenAI
import huggingface_hub
from langchain import HuggingFaceHub
from transformers import pipeline



In [21]:
# connect to datatbase
load_dotenv()


# Access environment variables
DB_HOST = os.getenv("DB_HOST")
DB_PORT = os.getenv("DB_PORT")
DB_NAME = os.getenv("DB_NAME")
DB_NAME2 = os.getenv("DB_NAME2")
DB_USER = os.getenv("DB_USER")
DB_PASSWORD = os.getenv("DB_PASSWORD")
HF_KEY= os.getenv("HUGGINGFACEHUB_API_TOKEN")
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")

from huggingface_hub import login
login(token=HF_KEY)


db = SQLDatabase.from_uri(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")
db2 = SQLDatabase.from_uri(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME2}")
engine = sqla.create_engine(f"postgresql+psycopg2://{DB_USER}:{DB_PASSWORD}@{DB_HOST}:{DB_PORT}/{DB_NAME}")

Token will not been saved to git credential helper. Pass `add_to_git_credential=True` if you want to set the git credential as well.
Token is valid (permission: read).
Your token has been saved to /Users/chelciedealmeida/.cache/huggingface/token
Login successful


  self._metadata.reflect(


In [22]:
# Load model directly

# llm = ChatOpenAI(model_name="gpt-3.5-turbo")
llm = ChatOpenAI(model_name="gpt-3.5-turbo", temperature=0.1)

toolkit = SQLDatabaseToolkit(db=db2, llm=llm)
agent_executor = create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True
)



In [23]:
agent_executor.run("describe the bookings schema?")




[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe relevant table for the bookings schema is "bookings". I should query the schema of this table.
Action: sql_db_schema
Action Input: "bookings"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/[0m
Thought:[32;1m[1;3mThe bookings schema consists of the following columns:
- book_ref (CHAR(6))
- book_date (TIMESTAMP WITH TIME ZONE)
- total_amount (NUMERIC(10, 2))

Th

'The bookings schema consists of the columns book_ref, book_date, and total_amount.'

In [24]:
agent_executor.run("describe the flights table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mThe "flights" table is available for querying.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY (flight_id), 
	CONSTRAINT flights_aircraft_code_fkey FOREIGN KEY(aircraft_code) REFERENCES aircrafts_data (aircraft_code), 
	CONSTRAINT flights_arrival_airp

'The flights table has the following columns: flight_id, flight_no, scheduled_departure, scheduled_arrival, departure_airport, arrival_airport, status, aircraft_code, actual_departure, and actual_arrival.'

In [25]:
agent_executor.run("How many rows are in the bookings table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "bookings" table to get the number of rows.
Action: sql_db_query
Action Input: "SELECT COUNT(*) FROM bookings"[0m
Observation: [36;1m[1;3m[(593433,)][0m
Thought:[32;1m[1;3mI now know the final answer.
Final Answer: There are 593,433 rows in the bookings table.[0m

[1m> Finished chain.[0m


'There are 593,433 rows in the bookings table.'

In [26]:
agent_executor.run("How many rows are in the aircrafts_data table?")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the aircrafts_data table to count the number of rows.
Action: sql_db_query
Action Input: "SELECT COUNT(*) FROM aircrafts_data"[0m
Observation: [36;1m[1;3m[(9,)][0m
Thought:[32;1m[1;3mI now know the final answer.
Final Answer: There are 9 rows in the aircrafts_data table.[0m

[1m> Finished chain.[0m


'There are 9 rows in the aircrafts_data table.'

In [27]:
agent_executor.run("find the top ten most expensive flights and order total amounts in descending order")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to find the most expensive flights. I should check the schema of the "flights" table to see which columns are relevant.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY (flight_id), 
	CONSTRAINT flights_aircraft_code_fke

OutputParserException: Could not parse LLM output: `The top ten most expensive flights, ordered by total amount in descending order, are:
1. Booking reference: 3B54BB, Book date: 2017-07-05 07:08:00, Total amount: $1,204,500.00
2. Booking reference: 53B75C, Book date: 2017-05-28 23:20:00, Total amount: $1,116,700.00
3. Booking reference: 3AC131, Book date: 2017-07-30 15:06:00, Total amount: $1,087,100.00
4. Booking reference: 4B58DB, Book date: 2017-05-14 05:39:00, Total amount: $1,065,600.00
5. Booking reference: 65A6EA, Book date: 2017-07-02 20:28:00, Total amount: $1,065,600.00
6. Booking reference: D7061C, Book date: 2017-06-06 01:49:00, Total amount: $1,065,600.00
7. Booking reference: D7E9AA, Book date: 2017-08-07 19:29:00, Total amount: $1,062,800.00
8. Booking reference: EF479E, Book date: 2017-08-02 05:58:00, Total amount: $1,035,100.00
9. Booking reference: 03BCC9, Book date: 2017-06-25 20:26:00, Total amount: $1,022,500.00
10. Booking reference: 1BE923, Book date: 2017-06-24 03:29:00, Total amount: $1,016,500.00`

In [29]:
# Testing for case sensitivity in values. Passenger name ANTONINA KUZNECOVA is spelled using the same case sensitivity as in the db
# executor correctly identified table and displayed the correct result based on the validation found in the validation_airports file.
agent_executor.run("How many ticket bookings does passenger ANTONINA KUZNECOVA have? ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI see that the relevant tables are bookings and tickets. I should query the schema of these tables to see what columns I can use to find the number of ticket bookings for a specific passenger.
Action: sql_db_schema
Action Input: "bookings, tickets"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/


CREATE TABLE tickets (
	ticket_no CHAR(13) NOT NULL, 
	book_ref CHAR(6

'117'

In [30]:

# langchain identified the value when the same case sensitivity used in the db was used in the prompt
# but failed to exexute query once the case sensitivity was incorrect. It did, however identify one of
# the correct tables {tickets} but joined to the {bookings} table which isn't incorrect. Executor did use case sensitivity 
# as instructed by the prompt.


agent_executor.run("How many ticket bookings does passenger Antonina Kuznecova have? ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI see that the relevant tables are bookings and tickets. I should query the schema of these tables to see what columns I can use to find the number of ticket bookings for passenger Antonina Kuznecova.
Action: sql_db_schema
Action Input: "bookings, tickets"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/


CREATE TABLE tickets (
	ticket_no CHAR(13) NOT NULL, 
	book_re

'0'

In [32]:
# langchain is unable to scan db views in provided schema
agent_executor.run("describe the routes view in the bookings schema")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "bookings" table to get the schema of the "routes" view.
Action: sql_db_schema
Action Input: "bookings"[0m
Observation: [33;1m[1;3m
CREATE TABLE bookings (
	book_ref CHAR(6) NOT NULL, 
	book_date TIMESTAMP WITH TIME ZONE NOT NULL, 
	total_amount NUMERIC(10, 2) NOT NULL, 
	CONSTRAINT bookings_pkey PRIMARY KEY (book_ref)
)

/*
3 rows from bookings table:
book_ref	book_date	total_amount
00000F	2017-07-04 17:12:00-07:00	265700.00
000012	2017-07-13 23:02:00-07:00	37900.00
00002D	2017-05-20 08:45:00-07:00	114700.00
*/[0m
Thought:

OutputParserException: Could not parse LLM output: `The "routes" view is not present in the "bookings" table. I don't know the answer.`

In [33]:
agent_executor.run("Get me the count of each flight status grouped by status and also provide the minimum and maximum departure ordered by the minimum scheduled departure ")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to get the count of each flight status. I can also query the "flights" table to get the minimum and maximum departure times, ordered by the minimum scheduled departure time.
Action: sql_db_schema
Action Input: flights[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY

OutputParserException: Could not parse LLM output: `I now know the final answer.`

In [35]:
agent_executor.run("Get me the count of each flight status grouped by status. Return only the status and count for each status type")



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3maircrafts_data, airports_data, boarding_passes, bookings, flights, seats, ticket_flights, tickets[0m
Thought:[32;1m[1;3mI can query the "flights" table to get the flight status. I should also query the schema of the "flights" table to see the available columns.
Action: sql_db_schema
Action Input: "flights"[0m
Observation: [33;1m[1;3m
CREATE TABLE flights (
	flight_id SERIAL NOT NULL, 
	flight_no CHAR(6) NOT NULL, 
	scheduled_departure TIMESTAMP WITH TIME ZONE NOT NULL, 
	scheduled_arrival TIMESTAMP WITH TIME ZONE NOT NULL, 
	departure_airport CHAR(3) NOT NULL, 
	arrival_airport CHAR(3) NOT NULL, 
	status VARCHAR(20) NOT NULL, 
	aircraft_code CHAR(3) NOT NULL, 
	actual_departure TIMESTAMP WITH TIME ZONE, 
	actual_arrival TIMESTAMP WITH TIME ZONE, 
	CONSTRAINT flights_pkey PRIMARY KEY (flight_id), 
	CONSTRAINT flights_aircraft_code_fkey FOREIGN 

'The count of each flight status grouped by status is as follows:\n- Departed: 58\n- Arrived: 49235\n- On Time: 518\n- Cancelled: 429\n- Delayed: 41\n- Scheduled: 15383'