In [1]:
import pandas as pd
from pyprojroot import here

In [7]:
df = pd.read_csv(r"data/csvs_xlsx/titanic.csv")
print(df.shape)
print(df.columns.tolist())
display(df.head())

(887, 8)
['Survived', 'Pclass', 'Name', 'Sex', 'Age', 'Siblings/Spouses Aboard', 'Parents/Children Aboard', 'Fare']


Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
0,0,3,Mr. Owen Harris Braund,male,22.0,1,0,7.25
1,1,1,Mrs. John Bradley (Florence Briggs Thayer) Cum...,female,38.0,1,0,71.2833
2,1,3,Miss. Laina Heikkinen,female,26.0,0,0,7.925
3,1,1,Mrs. Jacques Heath (Lily May Peel) Futrelle,female,35.0,1,0,53.1
4,0,3,Mr. William Henry Allen,male,35.0,0,0,8.05


#### SQL

In [8]:
from langchain_community.utilities import SQLDatabase
from sqlalchemy import create_engine

db_path = r"data/csvs_xlsx/test_sqldb.db"
db_path = f"sqlite:///{db_path}"

engine = create_engine(db_path)
df.to_sql("titanic", engine, index=False)

887

In [9]:
db = SQLDatabase(engine=engine)
print(db.dialect)
print(db.get_usable_table_names())
db.run("SELECT * FROM titanic WHERE Age < 2;")

sqlite
['titanic']


"[(1, 2, 'Master. Alden Gates Caldwell', 'male', 0.83, 0, 2, 29.0), (0, 3, 'Master. Eino Viljami Panula', 'male', 1.0, 4, 1, 39.6875), (1, 3, 'Miss. Eleanor Ileen Johnson', 'female', 1.0, 1, 1, 11.1333), (1, 2, 'Master. Richard F Becker', 'male', 1.0, 2, 1, 39.0), (1, 1, 'Master. Hudson Trevor Allison', 'male', 0.92, 1, 2, 151.55), (1, 3, 'Miss. Maria Nakid', 'female', 1.0, 0, 2, 15.7417), (0, 3, 'Master. Sidney Leonard Goodwin', 'male', 1.0, 5, 2, 46.9), (1, 3, 'Miss. Helene Barbara Baclini', 'female', 0.75, 2, 1, 19.2583), (1, 3, 'Miss. Eugenie Baclini', 'female', 0.75, 2, 1, 19.2583), (1, 2, 'Master. Viljo Hamalainen', 'male', 0.67, 1, 1, 14.5), (1, 3, 'Master. Bertram Vere Dean', 'male', 1.0, 1, 2, 20.575), (1, 3, 'Master. Assad Alexander Thomas', 'male', 0.42, 0, 1, 8.5167), (1, 2, 'Master. Andre Mallet', 'male', 1.0, 0, 2, 37.0042), (1, 2, 'Master. George Sibley Richards', 'male', 0.83, 1, 1, 18.75)]"

Equivalent in Pandas

In [10]:
df[df['Age']<2]

Unnamed: 0,Survived,Pclass,Name,Sex,Age,Siblings/Spouses Aboard,Parents/Children Aboard,Fare
77,1,2,Master. Alden Gates Caldwell,male,0.83,0,2,29.0
163,0,3,Master. Eino Viljami Panula,male,1.0,4,1,39.6875
171,1,3,Miss. Eleanor Ileen Johnson,female,1.0,1,1,11.1333
182,1,2,Master. Richard F Becker,male,1.0,2,1,39.0
303,1,1,Master. Hudson Trevor Allison,male,0.92,1,2,151.55
379,1,3,Miss. Maria Nakid,female,1.0,0,2,15.7417
384,0,3,Master. Sidney Leonard Goodwin,male,1.0,5,2,46.9
466,1,3,Miss. Helene Barbara Baclini,female,0.75,2,1,19.2583
641,1,3,Miss. Eugenie Baclini,female,0.75,2,1,19.2583
751,1,2,Master. Viljo Hamalainen,male,0.67,1,1,14.5


#### Create an agent to interact with the Database

In [11]:
import os
from dotenv import load_dotenv
load_dotenv()
import warnings
warnings.filterwarnings("ignore")

In [12]:
from langchain_groq import ChatGroq
# llm=ChatGroq(model="llama3-70b-8192")
llm = ChatGroq(
    temperature = 0,
    groq_api_key = os.getenv("groq_api_key"),
    model_name = os.getenv("llama_model_name")
)

In [13]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="tool-calling",verbose=True)

In [14]:
agent_executor.invoke(
    {"input":"Tell me more about Anders Johan Andersson"}
)



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{'tool_input': ''}`
responded:   Then I should double check my query before running it to make sure it is correct.  Then I can run the query and get the answer.



[0m[38;5;200m[1;3mtitanic[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'table1, table2, table3'}`
responded:   Then I should double check my query before running it to make sure it is correct.  Then I can run the query and get the answer.



[0m[33;1m[1;3mError: table_names {'table2', 'table3', 'table1'} not found in database[0m[32;1m[1;3m
Invoking: `sql_db_query_checker` with `{'query': 'SELECT column1, column2 FROM table1 WHERE column3 = "Anders Johan Andersson" LIMIT 10;'}`
responded:   Then I should double check my query before running it to make sure it is correct.  Then I can run the query and get the answer.



[0m[36;1m[1;3m```sql
SELECT column1, column2 FROM table1 WHERE column3 = '

{'input': 'Tell me more about Anders Johan Andersson',
 'output': 'Anders Johan Andersson was a male, aged 48.'}

In [15]:
agent_executor.invoke(
    {"input": "what's the average age of survivors"}
    )



[1m> Entering new SQL Agent Executor chain...[0m
[32;1m[1;3m
Invoking: `sql_db_list_tables` with `{'tool_input': ''}`
responded:   Then I should double check my query before running it to get the average age of survivors.



[0m[38;5;200m[1;3mtitanic[0m[32;1m[1;3m
Invoking: `sql_db_schema` with `{'table_names': 'table1, table2, table3'}`
responded:   Then I should double check my query before running it to get the average age of survivors.



[0m[33;1m[1;3mError: table_names {'table2', 'table3', 'table1'} not found in database[0m[32;1m[1;3m
Invoking: `sql_db_query_checker` with `{'query': 'SELECT AVG(age) FROM survivors;'}`
responded:   Then I should double check my query before running it to get the average age of survivors.



[0m[36;1m[1;3m```sql
SELECT AVG(age) FROM survivors;
```[0m[32;1m[1;3m
Invoking: `sql_db_query` with `{'query': 'SELECT AVG(age) FROM survivors LIMIT 10;'}`
responded:   Then I should double check my query before running it to get the av

{'input': "what's the average age of survivors",
 'output': 'The average age of survivors is 29.47.'}

In [16]:
# Equivalent in Pandas
df[df['Survived']==1]["Age"].mean()

np.float64(28.408391812865496)