In [1]:
from langchain.agents import create_sql_agent
from langchain.agents.agent_toolkits import SQLDatabaseToolkit
from langchain.sql_database import SQLDatabase
from langchain.agents import AgentExecutor
from langchain.agents.agent_types import AgentType
from langchain.llms import AzureOpenAI

In [2]:
import duckdb

In [3]:
conn = duckdb.connect(database='profiles.db')

In [6]:
conn.execute("INSTALL spatial;")

<duckdb.DuckDBPyConnection at 0x2479283e930>

In [7]:
conn.execute("LOAD spatial;")

<duckdb.DuckDBPyConnection at 0x2479283e930>

In [9]:
conn.execute("CREATE TABLE profile AS SELECT * FROM st_read('passenger_Data.xlsx', layer='passenger_data')")

<duckdb.DuckDBPyConnection at 0x2479283e930>

In [10]:
conn.execute("INSTALL sqlite;")

<duckdb.DuckDBPyConnection at 0x2479283e930>

In [11]:
uri = "duckdb:///profiles.db"

In [7]:
from sqlalchemy import create_engine

In [8]:
engine = create_engine(uri)

In [12]:
db = SQLDatabase.from_uri(uri,include_tables=['profile'])



In [13]:
from langchain.chat_models import AzureChatOpenAI

In [14]:
llm = AzureChatOpenAI(model="gpt-35-turbo",
                      openai_api_key="2dd4400d079a4fd49ddd2e864802522a",
                      openai_api_base="https://genai-interns.openai.azure.com/",
                      deployment_name="genai",
                      openai_api_version="2023-07-01-preview",
                      temperature=0,)

In [15]:
toolkit = SQLDatabaseToolkit(db=db, llm=llm)

In [13]:
from langchain.prompts.chat import ChatPromptTemplate

In [19]:
template = ("""
            You are a SQL Analyst that is querying a database of a summary of a airline .
            
            Below is a description of the columns ,data types , and information in the columns:
            
            The column name Passenger_name with the data type object contains the names of the passengers
            
            The column name Arrival_station with the data type object contains the arrival station of the passenger
            
            The column name Departure_station with the data type object contains the departure station of the passenger
            
            The column name Date_of_birth with the data type DateTime contains the data of birth of the passenger
            
            The column name Gender with the data type object contains the gender of the passenger
            
            The column name Meals with the data type object contains the data of meals consumed of the passenger
            
            The column name Baggage with the data type object contains the data of Baggage consumed of the passenger
            
            The column name Revenue with the data type float64 contains the data of Revenue generated by the passenger
            
            The column name Country with the data type object contains the data of Country the passenger belongs too
            
            The column name Unique_passenger_id with the data type object contains the data of unique passenger id of the passenger
            
            The column name Travel_Date with the data type DateTime contains the data of the date travelled by the passenger
            
            Your job is to answer the following questions:
            {query}
            """)

In [20]:
agent= create_sql_agent(
    llm=llm,
    toolkit=toolkit,
    verbose=True,
    agent_type=AgentType.ZERO_SHOT_REACT_DESCRIPTION
)

In [21]:
from langchain.prompts import PromptTemplate

In [22]:
prompt = PromptTemplate.from_template(template)

In [27]:
agent.run(prompt.format(query="based on the revenue generated on 2023 what do you think will be the revenue for march of 2024"))



[1m> Entering new AgentExecutor chain...[0m
[32;1m[1;3mAction: sql_db_list_tables
Action Input: ""[0m
Observation: [38;5;200m[1;3mprofile[0m
Thought:[32;1m[1;3mThe table "profile" seems relevant. I should query its schema.
Action: sql_db_schema
Action Input: "profile"[0m
Observation: [33;1m[1;3m
CREATE TABLE profile (
	"Passenger_name" VARCHAR, 
	"Arrival_station" VARCHAR, 
	"Departure_station" VARCHAR, 
	"Date_of_birth" DATE, 
	"Gender" VARCHAR, 
	"Meals" VARCHAR, 
	"Baggage" VARCHAR, 
	"Revenue" DOUBLE PRECISION, 
	"Country" VARCHAR, 
	"Unique_passenger_id" VARCHAR, 
	"Travel_date" DATE
)

/*
3 rows from profile table:
Passenger_name	Arrival_station	Departure_station	Date_of_birth	Gender	Meals	Baggage	Revenue	Country	Unique_passenger_id	Travel_date
Cathy Baker	San Francisco	Miami	1960-09-15	Male	Vegetarian	checked baggage allowance = 16kg	741.02	New Zealand	P0000	2023-08-08
Joanne Brown	Miami	Chicago	1968-12-24	Female	Vegan	checked baggage allowance = 23kg	795.99	Antar

"I don't know."

In [16]:
import pandas as pd


In [17]:
df = pd.read_excel("passenger_Data.xlsx")

In [18]:
df.info()

<class 'pandas.core.frame.DataFrame'>
RangeIndex: 1000 entries, 0 to 999
Data columns (total 11 columns):
 #   Column               Non-Null Count  Dtype         
---  ------               --------------  -----         
 0   Passenger_name       1000 non-null   object        
 1   Arrival_station      1000 non-null   object        
 2   Departure_station    1000 non-null   object        
 3   Date_of_birth        1000 non-null   datetime64[ns]
 4   Gender               1000 non-null   object        
 5   Meals                1000 non-null   object        
 6   Baggage              1000 non-null   object        
 7   Revenue              1000 non-null   float64       
 8   Country              1000 non-null   object        
 9   Unique_passenger_id  1000 non-null   object        
 10  Travel_date          1000 non-null   datetime64[ns]
dtypes: datetime64[ns](2), float64(1), object(8)
memory usage: 86.1+ KB
