In [18]:
import sqlite3
import langchain
import langchain_community
from langchain_community.utilities import SQLDatabase

In [19]:
db_path = r"./POC-LangChain/chinook-database-master/ChinookDatabase/DataSources/Chinook_Sqlite.sqlite"
db = SQLDatabase.from_uri(f"sqlite:///{db_path}")

In [None]:
import subprocess

# Führe den Befehl aus und erfasse die Ausgabe als Text
result = subprocess.run(["ollama", "list"], capture_output=True, text=True)
output = result.stdout.strip()

print(output)  # Debugging line

lines = output.splitlines()
if not lines:
    print("Keine Ausgabe erhalten.")
    exit()



NAME                     	ID          	SIZE  	MODIFIED     
qwen2.5-coder:7b         	2b0496514337	4.7 GB	5 days ago  	
deepseek-r1:8b           	28f8fd6cdc67	4.9 GB	7 days ago  	
llama3.2:1b-instruct-q4_0	53f2745c8077	770 MB	3 months ago	
llama3.2:1b              	baf6a787fdff	1.3 GB	3 months ago	
llama3.1:8b              	42182419e950	4.7 GB	4 months ago	
mistral:instruct         	f974a74358d6	4.1 GB	4 months ago


In [21]:
model_names = []
for line in lines[1:]:
    parts = line.split()
    if parts:
        model_names.append(parts[0])

print("Model names:", model_names)

Model names: ['qwen2.5-coder:7b', 'deepseek-r1:8b', 'llama3.2:1b-instruct-q4_0', 'llama3.2:1b', 'llama3.1:8b', 'mistral:instruct']


In [22]:
print(db.dialect)
db.get_usable_table_names()

sqlite


['Album',
 'Artist',
 'Customer',
 'Employee',
 'Genre',
 'Invoice',
 'InvoiceLine',
 'MediaType',
 'Playlist',
 'PlaylistTrack',
 'Track']

#### Schemainformationen aus der Datenbank extrahieren

In [23]:
tables = db.get_usable_table_names()

table_info = []
for table in tables:
    info = db.get_table_info([table])
    print(info)
    table_info.append(info)



CREATE TABLE "Album" (
	"AlbumId" INTEGER NOT NULL, 
	"Title" NVARCHAR(160) NOT NULL, 
	"ArtistId" INTEGER NOT NULL, 
	PRIMARY KEY ("AlbumId"), 
	FOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")
)

/*
3 rows from Album table:
AlbumId	Title	ArtistId
1	For Those About To Rock We Salute You	1
2	Balls to the Wall	2
3	Restless and Wild	2
*/

CREATE TABLE "Artist" (
	"ArtistId" INTEGER NOT NULL, 
	"Name" NVARCHAR(120), 
	PRIMARY KEY ("ArtistId")
)

/*
3 rows from Artist table:
ArtistId	Name
1	AC/DC
2	Accept
3	Aerosmith
*/

CREATE TABLE "Customer" (
	"CustomerId" INTEGER NOT NULL, 
	"FirstName" NVARCHAR(40) NOT NULL, 
	"LastName" NVARCHAR(20) NOT NULL, 
	"Company" NVARCHAR(80), 
	"Address" NVARCHAR(70), 
	"City" NVARCHAR(40), 
	"State" NVARCHAR(40), 
	"Country" NVARCHAR(40), 
	"PostalCode" NVARCHAR(10), 
	"Phone" NVARCHAR(24), 
	"Fax" NVARCHAR(24), 
	"Email" NVARCHAR(60) NOT NULL, 
	"SupportRepId" INTEGER, 
	PRIMARY KEY ("CustomerId"), 
	FOREIGN KEY("SupportRepId") REFERENCES "Employ

In [24]:
table_info

['\nCREATE TABLE "Album" (\n\t"AlbumId" INTEGER NOT NULL, \n\t"Title" NVARCHAR(160) NOT NULL, \n\t"ArtistId" INTEGER NOT NULL, \n\tPRIMARY KEY ("AlbumId"), \n\tFOREIGN KEY("ArtistId") REFERENCES "Artist" ("ArtistId")\n)\n\n/*\n3 rows from Album table:\nAlbumId\tTitle\tArtistId\n1\tFor Those About To Rock We Salute You\t1\n2\tBalls to the Wall\t2\n3\tRestless and Wild\t2\n*/',
 '\nCREATE TABLE "Artist" (\n\t"ArtistId" INTEGER NOT NULL, \n\t"Name" NVARCHAR(120), \n\tPRIMARY KEY ("ArtistId")\n)\n\n/*\n3 rows from Artist table:\nArtistId\tName\n1\tAC/DC\n2\tAccept\n3\tAerosmith\n*/',
 '\nCREATE TABLE "Customer" (\n\t"CustomerId" INTEGER NOT NULL, \n\t"FirstName" NVARCHAR(40) NOT NULL, \n\t"LastName" NVARCHAR(20) NOT NULL, \n\t"Company" NVARCHAR(80), \n\t"Address" NVARCHAR(70), \n\t"City" NVARCHAR(40), \n\t"State" NVARCHAR(40), \n\t"Country" NVARCHAR(40), \n\t"PostalCode" NVARCHAR(10), \n\t"Phone" NVARCHAR(24), \n\t"Fax" NVARCHAR(24), \n\t"Email" NVARCHAR(60) NOT NULL, \n\t"SupportRepId" IN

In [25]:
table_info_str = str(table_info)

#### Modell initialisieren

In [26]:
model_names

['qwen2.5-coder:7b',
 'deepseek-r1:8b',
 'llama3.2:1b-instruct-q4_0',
 'llama3.2:1b',
 'llama3.1:8b',
 'mistral:instruct']

In [27]:
model_name = model_names[4] # Modell festlegen
model_name

'llama3.1:8b'

In [28]:
from langchain_ollama import ChatOllama

llm = ChatOllama(
    model=model_name,
    temperature=0,
)

#### Tokenzählung für Schemainformationen durchführen

In [29]:
from litellm import token_counter

messages = [{"role": "user", "content": table_info_str}]

num_tokens = token_counter(model=f"ollama/{model_name}", messages=messages)


print(f"Tokenanzahl: {num_tokens}")

Tokenanzahl: 2337


#### Selbstgebaute Funktion zur Schemaextraktion

In [50]:
import sqlite3

def extract_schema(db_path, sample_rows=2, verbose=True):
    """
    Extrahiert das Schema aller Tabellen in der SQLite-Datenbank und gibt
    – abhängig vom Parameter verbose – entweder die Ergebnisse aus oder liefert sie als Dictionary zurück.
    
    Parameter:
      - db_path: Pfad zur SQLite-Datenbank.
      - sample_rows: Anzahl der Beispielzeilen, die pro Tabelle abgerufen werden.
      - verbose: Falls True, werden die Informationen direkt ausgegeben.
      
    Rückgabe:
      Ein Dictionary, in dem für jede Tabelle die folgenden Informationen enthalten sind:
        - columns: Liste der Spalten (inkl. Typ und Kennzeichnung als PK)
        - foreign_keys: Liste der Fremdschlüssel (Format: "Spalte -> referenzierteTabelle.referenzierteSpalte")
        - sample_data: Abgerufene Beispieldaten (als Liste von Zeilen)
    """
    conn = sqlite3.connect(db_path)
    cursor = conn.cursor()
    
    # Alle Tabellennamen abrufen
    cursor.execute("SELECT name FROM sqlite_master WHERE type='table';")
    table_names = [row[0] for row in cursor.fetchall()]
    
    schema = {}
    
    for table in table_names:
        if verbose:
            print(f"Schema for Table '{table}':")
        
        # Spalteninformationen abrufen
        cursor.execute(f"PRAGMA table_info({table});")
        columns_info = cursor.fetchall()  
        # Jede Zeile: (cid, name, type, notnull, dflt_value, pk)
        columns = []
        for col in columns_info:
            col_name = col[1]
            col_type = col[2]
            is_pk = " (PK)" if col[5] != 0 else ""
            columns.append(f"{col_name} {col_type}{is_pk}")
        if verbose:
            print("Columns:", ", ".join(columns))
        
        # Fremdschlüsselinformationen abrufen
        cursor.execute(f"PRAGMA foreign_key_list({table});")
        fk_info = cursor.fetchall()  
        foreign_keys = []
        for fk in fk_info:
            # fk: (id, seq, table, from, to, on_update, on_delete, match)
            foreign_keys.append(f"{fk[3]} -> {fk[2]}.{fk[4]}")
        if verbose:
            if foreign_keys:
                print("Foreign Keys:", ", ".join(foreign_keys))
            else:
                print("Foreign Keys: none")
        
        # Beispiel-Daten abrufen
        cursor.execute(f"SELECT * FROM {table} LIMIT {sample_rows};")
        sample_data = cursor.fetchall()
        if verbose:
            print("Sample data:", sample_data)
            print("-" * 80)
        
        schema[table] = {
            "columns": columns,
            "foreign_keys": foreign_keys,
            "sample_data": sample_data
        }
    
    conn.close()
    return schema



In [49]:
schema = extract_schema(db_path, sample_rows=1, verbose=True)

Schema for Table 'Album':
Columns: AlbumId INTEGER (PK), Title NVARCHAR(160), ArtistId INTEGER
Foreign Keys: ArtistId -> Artist.ArtistId
Sample data: [(1, 'For Those About To Rock We Salute You', 1)]
Schema for Table 'Artist':
Columns: ArtistId INTEGER (PK), Name NVARCHAR(120)
Foreign Keys: none
Sample data: [(1, 'AC/DC')]
Schema for Table 'Customer':
Columns: CustomerId INTEGER (PK), FirstName NVARCHAR(40), LastName NVARCHAR(20), Company NVARCHAR(80), Address NVARCHAR(70), City NVARCHAR(40), State NVARCHAR(40), Country NVARCHAR(40), PostalCode NVARCHAR(10), Phone NVARCHAR(24), Fax NVARCHAR(24), Email NVARCHAR(60), SupportRepId INTEGER
Foreign Keys: SupportRepId -> Employee.EmployeeId
Sample data: [(1, 'Luís', 'Gonçalves', 'Embraer - Empresa Brasileira de Aeronáutica S.A.', 'Av. Brigadeiro Faria Lima, 2170', 'São José dos Campos', 'SP', 'Brazil', '12227-000', '+55 (12) 3923-5555', '+55 (12) 3923-5566', 'luisg@embraer.com.br', 3)]
Schema for Table 'Employee':
Columns: EmployeeId INTEGER

In [46]:
schema_info_str = str(schema)

In [47]:
from litellm import token_counter

messages = [{"role": "user", "content": schema_info_str}]

num_tokens = token_counter(model=f"ollama/{model_name}", messages=messages)

print(f"Tokenanzahl: {num_tokens}")

Tokenanzahl: 1086
