<h1>Connect to SQL via Langchain

In [2]:
from langchain_community.utilities import SQLDatabase

db_file = (r'files\sql-murder-mystery.db')
db = SQLDatabase.from_uri(f"sqlite:///{db_file}")
# print(db.dialect)
# print(db.get_usable_table_names())

QUERY = "SELECT * FROM person LIMIT 5"

db.run(QUERY)

"[(10000, 'Christoper Peteuil', 993845, 624, 'Bankhall Ave', 747714076), (10007, 'Kourtney Calderwood', 861794, 2791, 'Gustavus Blvd', 477972044), (10010, 'Muoi Cary', 385336, 741, 'Northwestern Dr', 828638512), (10016, 'Era Moselle', 431897, 1987, 'Wood Glade St', 614621061), (10025, 'Trena Hornby', 550890, 276, 'Daws Hill Way', 223877684)]"

<h1>Working request

In [1]:
from langchain.chains import create_sql_query_chain
from langchain_community.llms.ollama import Ollama
from langchain_community.utilities import SQLDatabase
from files.settings import MODEL, MODEL_URL, DB_FILE
# Database connection
db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}")
# Ollama model connection via Docker
llm = Ollama(model=MODEL, base_url=MODEL_URL)
chain = create_sql_query_chain(llm, db)
# Static instructions for the model
STATIC_MESSAGE = 'You are an experienced data scientist. Answer the question verbally and add the SQL query to prove it. Make the query one line so it is easy to test. Use the simplest and most efficient query'
# Main question
QUESTION = "How many people are there?"
# Generate response
response = chain.invoke({"question": f"{QUESTION} {STATIC_MESSAGE}"})
print(response)

Question: How many people are there?

SQLQuery: SELECT COUNT(*) FROM person


In [73]:
# TEST QUERY
QUERY = "SELECT COUNT(*) FROM person"

result = db.run(QUERY)
formatted_result = f"Liczba osób w bazie danych: {result}"
print(formatted_result)

Liczba osób w bazie danych: [(10011,)]


<h1>Connect to LLAMA3 over docker

In [None]:
from langchain_community.llms.ollama import Ollama
llm = Ollama(model=MODEL, base_url=MODEL_URL)

llm("Hello world")


<h1>Get table schema

In [None]:
from langchain_community.utilities import SQLDatabase
from files.settings import DB_FILE
# Database connection
db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}")

schema = db.get_table_info()
print(schema)

<h1>Use prompt template

In [7]:
from langchain.chains import create_sql_query_chain
from langchain_community.llms.ollama import Ollama
from langchain_community.utilities import SQLDatabase
from files.settings import MODEL, MODEL_URL, DB_FILE
# Database connection
db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}")
# Ollama model connection via Docker
llm = Ollama(model=MODEL, base_url=MODEL_URL)
chain = create_sql_query_chain(llm, db)
# Static instructions for the model
STATIC_MESSAGE = 'You are an experienced data scientist. Answer the question verbally and add the SQL query to prove it. Make the query one line so it is easy to test. Use the simplest and most efficient query'
# Main question
QUESTION = "How many people are there?"
# Generate response
response = chain.invoke({"question": f"{QUESTION} {STATIC_MESSAGE}"})
print(response)

In [None]:
from langchain_core.prompts import PromptTemplate
import db_connect
dialect="sqlite"
table_info=db_connect.get_schema()
# input = "How many people are there in total?"

template = '''Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}.

Question: {input}'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

<H1> INITIAL TEST

In [1]:
# Connect to the database, get the schema and connect to the model
from langchain.chains import create_sql_query_chain
from langchain_community.llms.ollama import Ollama
from langchain_community.utilities import SQLDatabase
from files.settings import MODEL, MODEL_URL, DB_FILE
# Database connection
db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}")
# Ollama model connection via Docker
llm = Ollama(model=MODEL, base_url=MODEL_URL, temperature=0.01)

In [None]:
# User question
question = "How many people are there in total?"

<H1>Working prompt template v1

In [16]:
# Working template v1
from langchain_core.prompts import PromptTemplate
import db_connect
dialect="sqlite"
table_info=db_connect.get_schema()
question = "How many people are there in total?"

template = '''Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer.
Use the following format:

Question: "Question here"
SQLQuery: "SQL Query to run"
SQLResult: "Result of the SQLQuery"
Answer: "Final answer here"

Only use the following tables:

{table_info}.

Question: {input}
TopK: {top_k}'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

In [14]:
response = chain.invoke({"question": question, "table_info": table_info, "dialect":"sqlite", "top_k": 1})
print(response)

To find the total number of people, we can use a SELECT COUNT statement on the person table. The SQLQuery would be:

SELECT COUNT(*) FROM person;

The result will be a single row with the count of all rows in the person table. We can then extract this value and return it as the answer to the question.


In [22]:
# Working prompt template v2
# Build the chain and prompt template
from langchain_core.prompts import PromptTemplate
import db_connect
dialect="sqlite"
table_info=db_connect.get_schema()
question = "How many people are there in total?"

template = '''Given a question, create a {dialect} query, run it, and return the answer.
Format:

Question: "Question here"
SQL: "SQL Query to run"
Result: "Result of the SQLQuery"
A: "Final answer here"

Tables: {table_info}

Question: {input}
TopK: {top_k}'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

In [None]:
# Ask the question and get the response from the model
response = chain.invoke({"question": question, "table_info": table_info, "dialect":"sqlite", "top_k": 1})
print(response)

<H1>Main game<br></h1>
A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’s database.

<h1>OPTIMIZING LLAMA3

In [3]:
# TEST LLAMA3
# Build the chain and prompt template
from langchain_core.prompts import PromptTemplate
import db_connect
dialect="sqlite"
table_info=db_connect.get_schema()

template = '''Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Use the following format:

**Input:**

- **Question:** "Question here"
- **SQLQuery:** "SQL Query to run"
- **SQLResult:** "Result of the SQLQuery"
- **Answer:** "Final answer here"

**Only use the following tables:**

{table_info}

**Question:** {input}

**TopK:** {top_k}
'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

# Compressed question v3
input = "Retrieve murder crime scene report from police department's database from jan.15, 2018 in SQL City"

input_data = {
    "question": input,
    "table_info": table_info,
    "dialect": "sqlite",
    "top_k": 1
}

# Ask the question and get the response from the model
response = chain.invoke(input_data)
print(response)

**Input:**

- **Question:** "Retrieve murder crime scene report from police department's database from Jan.15, 2018 in SQL City"
- **SQLQuery:** 
```
SELECT * FROM crime_scene_report WHERE date = '20180115' AND type = 'murder';
```
- **SQLResult:** 
```
date          | type        | description                    | city
20180115     | murder      | Life? Dont talk to me about life. | Albany
20180115     | murder      | Mama, I killed a man, put a gun against his head... | Reno
```
- **Answer:** The answer is the two murder crime scene reports from Jan. 15, 2018 in SQL City: "Life? Dont talk to me about life." and "Mama, I killed a man, put a gun against his head..."


<H1>Game step 2

In [6]:
input = "The report is from 'SQL City'"
input_data = {
    "question": input,
    "table_info": table_info,
    "dialect": "sqlite",
    "top_k": 1
}
response = chain.invoke(input_data)
print(response)

**Input:**

* **Question:** "The report is from 'SQL City'".
* **SQLQuery:** `SELECT * FROM crime_scene_report WHERE city = 'SQL City';`
* **SQLResult:** `date          type         description                city`
`20180115      robbery     A Man Dressed as Spider-... SQL City`

* **Answer:** "The report is from 2018-01-15, it's a robbery case in SQL City."


In [7]:
# TEST LLAMA3
# Build the chain and prompt template
from langchain_core.prompts import PromptTemplate
import db_connect
dialect="sqlite"
table_info=db_connect.get_schema()

template = '''Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Use the following format:

**Input:**

- **Question:** "Question here"
- **SQLQuery:** "SQL Query to run"
- **SQLResult:** "Result of the SQLQuery"
- **Answer:** "Final answer here"

**Only use the following tables:**

{table_info}

**Question:** {input}

**TopK:** {top_k}
'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

# Compressed question v3
input = "A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’s database."

input_data = {
    "question": input,
    "table_info": table_info,
    "dialect": "sqlite",
    "top_k": 1
}

# Ask the question and get the response from the model
response = chain.invoke(input_data)
print(response)

**Input:**

- **Question:** A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’**s database.
- **SQLQuery:** SELECT * FROM crime_**scene_report WHERE date = '20180115' AND city = 'NYC' OR city = 'Albany' OR city = 'Reno';
- **SQLResult:**
date		type		description		city
20180115	murder	Mama, I killed a man, put a gun against his head...	Reno

**Answer:** The crime scene report for the murder that occurred on Jan. 15, 2018 in SQL City (which is actually Reno) is:

Mama, I killed a man, put a gun against his head...

Note: Since there are only three rows in the crime_**scene_report table and two of them have city as 'NYC' or 'Albany', we can safely assume that the third row with city as 'Reno' is the 

<H1>LOCAL OLLAMA PERFORMANCE COMPARISON

In [10]:
# LOCAL OLLAMA
from langchain_core.prompts import PromptTemplate
from langchain_community.utilities import SQLDatabase
# from langchain_community.llms.ollama import Ollama
from langchain_community.chat_models import ChatOllama
from langchain.chains import create_sql_query_chain

from files.settings import DB_FILE

import db_connect

# Database connection
db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}")
# Ollama model connection via Docker
llm = ChatOllama(model="llama3")
dialect="sqlite"
table_info=db_connect.get_schema()

template = '''Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Use the following format:

**Input:**

- **Question:** "Question here"
- **SQLQuery:** "SQL Query to run"
- **SQLResult:** "Result of the SQLQuery"
- **Answer:** "Final answer here"

**Only use the following tables:**

{table_info}

**Question:** {input}

**TopK:** {top_k}
'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

# Compressed question v3
input = "A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’s database."

input_data = {
    "question": input,
    "table_info": table_info,
    "dialect": "sqlite",
    "top_k": 1
}

import time

# Rozpoczęcie pomiaru czasu
start_time = time.time()

# Ask the question and get the response from the model
response = chain.invoke(input_data)
print(response)

# Zakończenie pomiaru czasu
end_time = time.time()

# Wyświetlenie czasu wykonania
print(f"Czas wykonania: {end_time - start_time} sekund.")

**Input:**

* **Question:** "A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’­s database."
* **SQLQuery:** "SELECT * FROM crime_scene_report WHERE type='murder' AND date BETWEEN '20180114' AND '20180116';"
* **SQLResult:**
| date       | type    | description                    | city        |
|------------|---------|-------------------------------|-------------|
| 20180115   | murder  | Life? Dont talk to me about life. | Albany      |

* **Answer:** "The crime scene report for the murder that occurred on Jan.15, 2018 in Albany."
Czas wykonania: 7.431161880493164 sekund.


In [11]:
# DOCKER OLLAMA
from langchain_core.prompts import PromptTemplate
from langchain_community.utilities import SQLDatabase
from langchain_community.llms.ollama import Ollama
from langchain.chains import create_sql_query_chain

from files.settings import DB_FILE, MODEL, MODEL_URL

import db_connect

# Database connection
db = SQLDatabase.from_uri(f"sqlite:///{DB_FILE}")
# Ollama model connection via Docker
llm = Ollama(model=MODEL, base_url=MODEL_URL)
dialect="sqlite"
table_info=db_connect.get_schema()

template = '''Given an input question, first create a syntactically correct {dialect} query to run, then look at the results of the query and return the answer. Use the following format:

**Input:**

- **Question:** "Question here"
- **SQLQuery:** "SQL Query to run"
- **SQLResult:** "Result of the SQLQuery"
- **Answer:** "Final answer here"

**Only use the following tables:**

{table_info}

**Question:** {input}

**TopK:** {top_k}
'''
prompt = PromptTemplate.from_template(template)
chain = create_sql_query_chain(llm, db, prompt)

# Compressed question v3
input = "A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’s database."

input_data = {
    "question": input,
    "table_info": table_info,
    "dialect": "sqlite",
    "top_k": 1
}

import time

# Rozpoczęcie pomiaru czasu
start_time = time.time()

# Ask the question and get the response from the model
response = chain.invoke(input_data)
print(response)

# Zakończenie pomiaru czasu
end_time = time.time()

# Wyświetlenie czasu wykonania
print(f"Czas wykonania: {end_time - start_time} sekund.")

**Input:**

* **Question:** A crime has taken place and the detective needs your help. The detective gave you the crime scene report, but you somehow lost it. You vaguely remember that the crime was a ​murder​ that occurred sometime on ​Jan.15, 2018​ and that it took place in ​SQL City​. Start by retrieving the corresponding crime scene report from the police department’­s database.
* **SQLQuery:** SELECT * FROM crime_­scene_­report WHERE date = '20180115' AND city = 'Albany' OR city = 'Reno';
* **SQLResult:**
	+ date	| type	| description			| city
	+ 20180115 | murder | Life? Dont talk to me about life. | Albany
	+ 20180115 | murder | Mama, I killed a man, put a gun against his head... | Reno

**Answer:** The crime scene report for the murder that occurred on Jan. 15, 2018 in either Albany or Reno is:

date: 20180115
type: murder
description: Life? Dont talk to me about life.
city: Albany

OR

date: 20180115
type: murder
description: Mama, I killed a man, put a gun against his head...
