In [77]:
import sqlite3
import langchain
import langchain_community
from langchain_community.utilities import SQLDatabase

In [78]:
db_path = r"./POC-LangChain/chinook-database-master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [79]:
import subprocess

# Führe den Befehl aus und erfasse die Ausgabe als Text
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
output = result.stdout.strip()

print(output)  # Debugging line

lines = output.splitlines()
if not lines:
    print("Keine Ausgabe erhalten.")
    exit()

NAME                     	ID          	SIZE  	MODIFIED     
qwen2.5-coder:7b         	2b0496514337	4.7 GB	6 days ago  	
deepseek-r1:8b           	28f8fd6cdc67	4.9 GB	8 days ago  	
llama3.2:1b-instruct-q4_0	53f2745c8077	770 MB	3 months ago	
llama3.2:1b              	baf6a787fdff	1.3 GB	3 months ago	
llama3.1:8b              	42182419e950	4.7 GB	4 months ago	
mistral:instruct         	f974a74358d6	4.1 GB	4 months ago


In [80]:
model_names = []
for line in lines[1:]:
    parts = line.split()
    if parts:
        model_names.append(parts[0])

print("Model names:", model_names)

Model names: ['qwen2.5-coder:7b', 'deepseek-r1:8b', 'llama3.2:1b-instruct-q4_0', 'llama3.2:1b', 'llama3.1:8b', 'mistral:instruct']


In [81]:
db_dialect = db.dialect
print(db.dialect)
db.get_usable_table_names()

sqlite


['Album',
 'Artist',
 'Customer',
 'Employee',
 'Genre',
 'Invoice',
 'InvoiceLine',
 'MediaType',
 'Playlist',
 'PlaylistTrack',
 'Track']

#### Schemainformationen aus der Datenbank extrahieren

In [82]:
tables = db.get_usable_table_names()

table_info = []
for table in tables:
    info = db.get_table_info([table])
    # print(info)
    table_info.append(info)


In [83]:
table_info_str = str(table_info)

#### Modell initialisieren

In [84]:
model_names

['qwen2.5-coder:7b',
 'deepseek-r1:8b',
 'llama3.2:1b-instruct-q4_0',
 'llama3.2:1b',
 'llama3.1:8b',
 'mistral:instruct']

In [85]:
model_name = model_names[0] # Modell festlegen
model_name

'qwen2.5-coder:7b'

In [86]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model=model_name,
    temperature=0,
)
print(f'{model_name} ausgewählt und geladen.')

qwen2.5-coder:7b ausgewählt und geladen.


#### Tokenzählung für Schemainformationen durchführen

In [87]:
from litellm import token_counter

messages = [{"role": "user", "content": table_info_str}]

num_tokens = token_counter(model=f"ollama/{model_name}", messages=messages)

print(f"Tokenanzahl: {num_tokens}")

Tokenanzahl: 2337


#### Selbstgebaute Funktion zur Schemaextraktion

In [88]:
import sqlite3

def extract_schema(db_path, sample_rows=2, verbose=True):
    """
    Extrahiert das Schema aller Tabellen in der SQLite-Datenbank und gibt
    – abhängig vom Parameter verbose – entweder die Ergebnisse aus oder liefert sie als Dictionary zurück.
    
    Parameter:
      - db_path: Pfad zur Datenbank.
      - sample_rows: Anzahl der Beispielzeilen, die pro Tabelle abgerufen werden.
      - verbose: Falls True, werden die Informationen direkt ausgegeben.
      
    Rückgabe:
      Ein Dictionary, in dem für jede Tabelle die folgenden Informationen enthalten sind:
        - columns: Liste der Spalten (inkl. Typ und Kennzeichnung als PK)
        - foreign_keys: Liste der Fremdschlüssel (Format: "Spalte -> referenzierteTabelle.referenzierteSpalte")
        - sample_data: Abgerufene Beispieldaten (als Liste von Zeilen)
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Alle Tabellennamen abrufen
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    table_names = [row[0] for row in cursor.fetchall()]
    
    schema = {}
    
    for table in table_names:
        if verbose:
            print(f"Schema for Table '{table}':")
        
        # Spalteninformationen abrufen
        cursor.execute(f"PRAGMA table_info({table});")
        columns_info = cursor.fetchall()  
        columns = []
        for col in columns_info:
            col_name = col[1]
            col_type = col[2]
            is_pk = " (PK)" if col[5] != 0 else ""
            columns.append(f"{col_name} {col_type}{is_pk}")
        if verbose:
            print("Columns:", ", ".join(columns))
        
        # Fremdschlüsselinformationen abrufen
        cursor.execute(f"PRAGMA foreign_key_list({table});")
        fk_info = cursor.fetchall()  
        foreign_keys = []
        for fk in fk_info:
            foreign_keys.append(f"{fk[3]} -> {fk[2]}.{fk[4]}")
        if verbose:
            if foreign_keys:
                print("Foreign Keys:", ", ".join(foreign_keys))
            else:
                print("Foreign Keys: none")
        
        # Beispiel-Daten abrufen
        cursor.execute(f"SELECT * FROM {table} LIMIT {sample_rows};")
        sample_data = cursor.fetchall()
        if verbose:
            print("Sample data:", sample_data)
            print("-" * 80)
        
        schema[table] = {
            "columns": columns,
            "foreign_keys": foreign_keys,
            "sample_data": sample_data
        }
    
    conn.close()
    return schema


In [89]:
schema = extract_schema(db_path, sample_rows=1, verbose=True)

Schema for Table 'Album':
Columns: AlbumId INTEGER (PK), Title NVARCHAR(160), ArtistId INTEGER
Foreign Keys: ArtistId -> Artist.ArtistId
Sample data: [(1, 'For Those About To Rock We Salute You', 1)]
--------------------------------------------------------------------------------
Schema for Table 'Artist':
Columns: ArtistId INTEGER (PK), Name NVARCHAR(120)
Foreign Keys: none
Sample data: [(1, 'AC/DC')]
--------------------------------------------------------------------------------
Schema for Table 'Customer':
Columns: CustomerId INTEGER (PK), FirstName NVARCHAR(40), LastName NVARCHAR(20), Company NVARCHAR(80), Address NVARCHAR(70), City NVARCHAR(40), State NVARCHAR(40), Country NVARCHAR(40), PostalCode NVARCHAR(10), Phone NVARCHAR(24), Fax NVARCHAR(24), Email NVARCHAR(60), SupportRepId INTEGER
Foreign Keys: SupportRepId -> Employee.EmployeeId
Sample data: [(1, 'Luís', 'Gonçalves', 'Embraer - Empresa Brasileira de Aeronáutica S.A.', 'Av. Brigadeiro Faria Lima, 2170', 'São José dos Camp

In [90]:
schema_info_str = str(schema)

#### Prompt erstellen

In [100]:
from langchain import PromptTemplate

# Define the prompt template in English
prompt_template = PromptTemplate(
    input_variables=["schema_info", "question","db_dialect"],
    template="""
You are a helpful SQL assistant that provides only SELECT-Statements.
Based on the following database schema, translate a natural language query into an SQL query that is executable in my {db_dialect}-Database.

Database Schema:
{schema_info}

Using the schema information above, please formulate the appropriate SQL query this question:
Question: {question}

SQL Query:
""".strip()
)

In [92]:
question = "Which albums are the top sellers?"

prompt = prompt_template.format(
    schema_info=schema_info_str, 
    question=question, 
    db_dialect=db_dialect
    )

print(prompt)

You are a helpful SQL assistant that provides only SELECT-Statements.
Based on the following database schema, translate a natural language query into an SQL query that is executable in my sqlite-Database.

Database Schema:
{'Album': {'columns': ['AlbumId INTEGER (PK)', 'Title NVARCHAR(160)', 'ArtistId INTEGER'], 'foreign_keys': ['ArtistId -> Artist.ArtistId'], 'sample_data': [(1, 'For Those About To Rock We Salute You', 1)]}, 'Artist': {'columns': ['ArtistId INTEGER (PK)', 'Name NVARCHAR(120)'], 'foreign_keys': [], 'sample_data': [(1, 'AC/DC')]}, 'Customer': {'columns': ['CustomerId INTEGER (PK)', 'FirstName NVARCHAR(40)', 'LastName NVARCHAR(20)', 'Company NVARCHAR(80)', 'Address NVARCHAR(70)', 'City NVARCHAR(40)', 'State NVARCHAR(40)', 'Country NVARCHAR(40)', 'PostalCode NVARCHAR(10)', 'Phone NVARCHAR(24)', 'Fax NVARCHAR(24)', 'Email NVARCHAR(60)', 'SupportRepId INTEGER'], 'foreign_keys': ['SupportRepId -> Employee.EmployeeId'], 'sample_data': [(1, 'Luís', 'Gonçalves', 'Embraer - Empr

#### States definieren

In [102]:
from typing_extensions import TypedDict

class State(TypedDict):
    schema_info: str       # Geladene Informationen zum Datenbankschema
    question: str          # Ursprüngliche Nutzereingabe in natürlicher Sprache
    db_dialect: str        # Verwendeter SQL-Dialekt, z.B. "SQLite"
    parsed_question: str   # (Optional) Analyse der Frage
    query: str             # Generierte SQL-Abfrage
    execution_result: str  # Ergebnis der SQL-Abfrage-Ausführung
    answer: str            # Endgültige Antwort, formatiert für den Nutzer
    error: str             # Fehlermeldungen, falls vorhanden


#### Chain erstellen

In [93]:
import langchain_community

In [94]:
from langchain_experimental.sql import SQLDatabaseChain

In [97]:
from langchain.chains import LLMChain

# Erstelle die Chain mit dem Prompt-Template
chain = LLMChain(llm=llm, prompt=prompt_template)

# Beispielhafte Anfrage
sql_query = chain.run(schema_info=schema_info_str, question=question, db_dialect=db_dialect)

print(sql_query)

```sql
SELECT a.Title 
FROM Album a
JOIN Track t ON a.AlbumId = t.AlbumId
JOIN InvoiceLine il ON t.TrackId = il.TrackId
GROUP BY a.AlbumId
ORDER BY SUM(il.UnitPrice * il.Quantity) DESC;
```
