https://python.langchain.com/docs/tutorials/sql_qa/

In [104]:
import sqlite3
import pandas as pd
from langchain_community.utilities import SQLDatabase
import sqlalchemy

In [105]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy import create_engine, MetaData
from sqlalchemy_schemadisplay import create_schema_graph

from sqlalchemy import create_engine, MetaData
from eralchemy import render_er

In [106]:
# früherer Versuch der Einbindung der Chinook Datenbank
db_path = r"./POC-LangChain/chinook-database-master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [107]:
conn = sqlite3.connect(db_path)

In [108]:
result = db.run("SELECT * FROM Artist LIMIT 10;")
print(result)

[(1, 'AC/DC'), (2, 'Accept'), (3, 'Aerosmith'), (4, 'Alanis Morissette'), (5, 'Alice In Chains'), (6, 'Antônio Carlos Jobim'), (7, 'Apocalyptica'), (8, 'Audioslave'), (9, 'BackBeat'), (10, 'Billy Cobham')]


In [109]:
tables = conn.execute("SELECT name FROM sqlite_master WHERE type='table';").fetchall()
print("Available tables:", [t[0] for t in tables])

Available tables: ['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']


In [110]:
query_test = "SELECT * FROM Artist LIMIT 10;"
df=pd.read_sql_query(query_test, conn)
display(df)


Unnamed: 0,ArtistId,Name
0,1,AC/DC
1,2,Accept
2,3,Aerosmith
3,4,Alanis Morissette
4,5,Alice In Chains
5,6,Antônio Carlos Jobim
6,7,Apocalyptica
7,8,Audioslave
8,9,BackBeat
9,10,Billy Cobham


In [111]:
query = """
SELECT Customer.CustomerId, Customer.FirstName, Customer.LastName, 
       SUM(Invoice.Total) AS TotalSpent
FROM Customer
JOIN Invoice ON Customer.CustomerId = Invoice.CustomerId
GROUP BY Customer.CustomerId
ORDER BY TotalSpent DESC
LIMIT 5;
"""
df = pd.read_sql(query, conn)
display(df)


Unnamed: 0,CustomerId,FirstName,LastName,TotalSpent
0,6,Helena,Holý,49.62
1,26,Richard,Cunningham,47.62
2,57,Luis,Rojas,46.62
3,45,Ladislav,Kovács,45.62
4,46,Hugh,O'Reilly,45.62


In [168]:
from sqlalchemy import create_engine, MetaData
from sqlalchemy_schemadisplay import create_schema_graph

# Connect to database
engine = create_engine(f"sqlite:///{db_path}")

# Create MetaData object and reflect the schema
metadata = MetaData()
metadata.reflect(bind=engine)

# Specify the output file name
output_file = "chinook_erd_from_eralchemy.png"

# Generate and save the ERD
render_er(metadata, output_file)

print(f"✅ ERD saved as {output_file}")

✅ ERD saved as chinook_erd_from_eralchemy.png


In [113]:
!ollama list

NAME                     	ID          	SIZE  	MODIFIED     
deepseek-r1:8b           	28f8fd6cdc67	4.9 GB	30 hours ago	
llama3.2:1b-instruct-q4_0	53f2745c8077	770 MB	3 months ago	
llama3.2:1b              	baf6a787fdff	1.3 GB	3 months ago	
llama3.1:8b              	42182419e950	4.7 GB	4 months ago	
mistral:instruct         	f974a74358d6	4.1 GB	4 months ago	


In [115]:
from langchain_experimental.sql import SQLDatabaseChain
from langchain_ollama import ChatOllama
from langchain.sql_database import SQLDatabase

In [116]:
print(db.dialect)
print(db.get_usable_table_names())

sqlite
['Album', 'Artist', 'Customer', 'Employee', 'Genre', 'Invoice', 'InvoiceLine', 'MediaType', 'Playlist', 'PlaylistTrack', 'Track']


In [120]:
from typing_extensions import TypedDict


class State(TypedDict):
    question: str
    query: str
    result: str
    answer: str

In [127]:
from langchain_ollama import ChatOllama

# Initialize the Llama 3.1 model
llm = ChatOllama(
    model="llama3.1:8b",
    temperature=0,
)

In [154]:
# We will pull a prompt from the Prompt Hub to instruct the model.
from langchain import hub

query_prompt_template = hub.pull("langchain-ai/sql-query-system-prompt")

assert len(query_prompt_template.messages) == 1
query_prompt_template.messages[0].pretty_print()




Given an input question, create a syntactically correct [33;1m[1;3m{dialect}[0m query to run to help find the answer. Unless the user specifies in his question a specific number of examples they wish to obtain, always limit your query to at most [33;1m[1;3m{top_k}[0m results. You can order the results by a relevant column to return the most interesting examples in the database.

Never query for all the columns from a specific table, only ask for a the few relevant columns given the question.

Pay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.

Only use the following tables:
[33;1m[1;3m{table_info}[0m

Question: [33;1m[1;3m{input}[0m


In [178]:
from typing_extensions import Annotated, TypedDict

class QueryOutput(TypedDict):
    """Generated SQL query."""
    query: Annotated[str, ..., "Syntactically valid SQL query."]

def write_query(state: State):
    """Generate SQL query to fetch information."""
    prompt = query_prompt_template.invoke(
        {
            "dialect": db.dialect,
            "top_k": 10,
            "table_info": db.get_table_info(),
            "input": state["question"],
        }
    )
    structured_llm = llm.with_structured_output(QueryOutput)
    result = structured_llm.invoke(prompt)
    
    # Debug prints to check the result
    print("Prompt:", prompt)
    print("Result:", result)
    
    if result and "query" in result:
        return {"query": result["query"]}
    else:
        print("Error: No query generated.")
        return None

# Example usage
state = State(question="Who are the top artists in the database?")
query_result = write_query(state)

if query_result:
    print("Generated Query:", query_result["query"])
else:
    print("Failed to generate query.")

Prompt: messages=[SystemMessage(content='Given an input question, create a syntactically correct sqlite query to run to help find the answer. Unless the user specifies in his question a specific number of examples they wish to obtain, always limit your query to at most 10 results. You can order the results by a relevant column to return the most interesting examples in the database.\n\nNever query for all the columns from a specific table, only ask for a the few relevant columns given the question.\n\nPay attention to use only the column names that you can see in the schema description. Be careful to not query for columns that do not exist. Also, pay attention to which column is in which table.\n\nOnly use the following tables:\n\nCREATE TABLE "Album" (\n\t"AlbumId" INTEGER NOT NULL, \n\t"Title" NVARCHAR(160) NOT NULL, \n\t"ArtistId" INTEGER NOT NULL, \n\tPRIMARY KEY ("AlbumId"), \n\tFOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")\n)\n\n/*\n3 rows from Album table:\nAlbumId\tT

Generate NL-Answer based on the query result

In [171]:
from langchain_core.prompts import ChatPromptTemplate

answer_prompt = ChatPromptTemplate.from_template(
    "Based on the query: {query}\n"
    "And the result: {result}\n"
    "Please provide a natural language answer to the question: {question}"
)

answer = llm.invoke(answer_prompt.format(
    query=query_result["query"],
    result=state["result"],
    question=state["question"]
))

state["answer"] = answer.content
print("Answer:", state["answer"])


NameError: name 'query_result' is not defined