# testing on CSV files

In [2]:
import pandas as pd 
from pyprojroot import here

In [5]:
df=pd.read_csv(here('data/for_upload/titanic.csv'))
df.head()

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


# Convert to SQL 

In [7]:
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine
db_path=str(here('data'))+'/titanic_sqldb.db'
db_path=f'sqlite:///{db_path}'

engine=create_engine(db_path)
df.to_sql('titanic',engine,index=False)

887

In [8]:
db=SQLDatabase(engine=engine)
print(db.dialect)
print(db.get_usable_table_names())

sqlite
['titanic']


In [47]:
pd.DataFrame(db._execute('select * from titanic'))

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.2500
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.9250
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1000
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.0500
...,...,...,...,...,...,...,...,...
882,0,2,Rev. Juozas Montvila,male,27.0,0,0,13.0000
883,1,1,Miss. Margaret Edith Graham,female,19.0,0,0,30.0000
884,0,3,Miss. Catherine Helen Johnston,female,7.0,1,2,23.4500
885,1,1,Mr. Karl Howell Behr,male,26.0,0,0,30.0000


# create an agent to interact with the database

In [15]:
import os
from dotenv import load_dotenv
import warnings

warnings.filterwarnings('ignore')
print(f'load environment variables:{load_dotenv()}')


load environment variables:True


In [16]:
from langchain_community.chat_models import AzureChatOpenAI

model_name=os.getenv('AZURE_OPENAI_DEPLOYMENT_MODEL')
azure_openai_api_key=os.getenv('AZURE_OPENAI_API_KEY')
azure_openai_endpoint=os.getenv('AZURE_OpenAI_ENDPOINT')

llm=AzureChatOpenAI(
    openai_api_version=os.getenv('AZURE_OpenAI_API_VERSION'),
    azure_deployment=model_name,
    model_name=model_name,
    temperature=0.2
)

In [17]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor=create_sql_agent(llm,db=db,agent_type='openai-tools',verbose=True)

In [51]:
agent_executor.invoke({'input':'what is the age group of average male survivors'})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mtitanic[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'titanic'}`


[0m[33;1m[1;3m
CREATE TABLE titanic (
	"Survived" BIGINT, 
	"Pclass" BIGINT, 
	"Name" TEXT, 
	"Sex" TEXT, 
	"Age" FLOAT, 
	"Siblings/Spouses Aboard" BIGINT, 
	"Parents/Children Aboard" BIGINT, 
	"Fare" FLOAT
)

/*
3 rows from titanic table:
Survived	Pclass	Name	Sex	Age	Siblings/Spouses Aboard	Parents/Children Aboard	Fare
0	3	Mr. Owen Harris Braund	male	22.0	1	0	7.25
1	1	Mrs. John Bradley (Florence Briggs Thayer) Cumings	female	38.0	1	0	71.2833
1	3	Miss. Laina Heikkinen	female	26.0	0	0	7.925
*/[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': "SELECT AVG(Age) AS Average_Age, CASE WHEN Age < 18 THEN 'Child' WHEN Age >= 18 AND Age < 35 THEN 'Young Adult' WHEN Age >= 35 AND Age < 60 THEN 'Adult' ELSE 'Senior' END AS Age_Group FROM titanic WHERE Survived = 1 AND Se

{'input': 'what is the age group of average male survivors',
 'output': 'The average age group of male survivors is "Young Adult" with an average age of approximately 27.4 years.'}

In [49]:
pd.DataFrame(db._execute('select * from titanic where name like "%Mr. Lionel Leonard%"'))

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Lionel Leonard,male,36.0,0,0,0.0


In [52]:

answer=agent_executor.invoke({'input':'what is the highest age of male survivor and does he have siblings'})



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{}`


[0m[38;5;200m[1;3mtitanic[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'titanic'}`


[0m[33;1m[1;3m
CREATE TABLE titanic (
	"Survived" BIGINT, 
	"Pclass" BIGINT, 
	"Name" TEXT, 
	"Sex" TEXT, 
	"Age" FLOAT, 
	"Siblings/Spouses Aboard" BIGINT, 
	"Parents/Children Aboard" BIGINT, 
	"Fare" FLOAT
)

/*
3 rows from titanic table:
Survived	Pclass	Name	Sex	Age	Siblings/Spouses Aboard	Parents/Children Aboard	Fare
0	3	Mr. Owen Harris Braund	male	22.0	1	0	7.25
1	1	Mrs. John Bradley (Florence Briggs Thayer) Cumings	female	38.0	1	0	71.2833
1	3	Miss. Laina Heikkinen	female	26.0	0	0	7.925
*/[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': "SELECT MAX(Age) AS Highest_Age, Siblings/Spouses Aboard AS Siblings FROM titanic WHERE Survived = 1 AND Sex = 'male' AND Siblings/Spouses Aboard > 0"}`
responded: Based on the schema of the "titanic" table, I can see that

In [53]:
print(answer['output'])

The highest age of a male survivor is 60 years old. He has 1 sibling.
