In [None]:
%pip install --upgrade --quiet  langchain langchain-community langchain-openai python-dotenv

In [1]:
import os
from dotenv import load_dotenv, find_dotenv
# load the .env variables
load_dotenv(find_dotenv())

# Importing the openai api key to use openai models
openai_apikey = os.environ.get("OPENAI_API_KEY")

In [2]:
from langchain_openai import ChatOpenAI
from langchain_community.utilities import SQLDatabase

# Loading the Chinook database
db = SQLDatabase.from_uri("sqlite:///Chinook.db")
# Loading the openai model that consumes this db
llm = ChatOpenAI(model="gpt-3.5-turbo", temperature=0)

# SQL Dialect Problem
SQL dialects are essentially different versions of the SQL language, each tweaked to suit the peculiarities of individual database systems (SGBD). Therefore, an effective error handling should take into account these differences.

In [6]:
from langchain.chains.sql_database.prompt import SQL_PROMPTS

# List of supported dialects
print("Different SQL dialects: ", list(SQL_PROMPTS))

# Our database dialect
print("Our current dialect: ", db.dialect)

Different SQL dialects:  ['crate', 'duckdb', 'googlesql', 'mssql', 'mysql', 'mariadb', 'oracle', 'postgresql', 'sqlite', 'clickhouse', 'prestodb']
Our current dialect:  sqlite


# Feed the schema context
In order to be able to write valid queries, the model needs to know in details our database. So to give it a relevant context, we will feed it with the table names, their schemas, and a sample of rows from each table. 

If the database schema is too large, we can choose only specific tables that are mostly used by users (extracted from their query inputs). In our case, we are going to feed the entire schema for effective error handling.  

In [5]:
context = db.get_context()["table_info"]
print(context)


CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/


CREATE TABLE "Artist" (
	"ArtistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("ArtistId")
)

/*
3 rows from Artist table:
ArtistId	Name
1	AC/DC
2	Accept
3	Aerosmith
*/


CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Empl

# Prompt Template Customization

## Zero-shot prompting
In this section, we need to tell our model the specific tasks to do and feed it with the right information (sql dialect, database schema, some db samples..). In this prompting strategy, we do not provide any example (zero-shot) and let the model decide by itself.

# Model creation and testing
This model will consume the previous schema context and prompt template.

In [None]:
from langchain_community.agent_toolkits import create_sql_agent

agent_executor = create_sql_agent(llm, db=db, agent_type="openai-tools", verbose=True)

In [None]:
agent_executor.invoke(
    {
        "input": "List the total sales per country. Which country's customers spent the most?"
    }
)