In [None]:
_ = !pip install pandas
_ = !pip install langchain

In [None]:
import os
import sqlite3
import pandas as pd
from langchain import OpenAI, SQLDatabase, SQLDatabaseChain

### Create and populate local database

In [None]:
# source: https://gist.githubusercontent.com/seankross
data_path = 'data/mtcars.csv'
!curl 'https://gist.githubusercontent.com/seankross/a412dfbd88b3db70b74b/raw/5f23f993cd87c283ce766e7ac6b329ee7cc2e1d1/mtcars.csv' >> data/mtcars.csv

In [None]:
df = pd.read_csv(data_path, header=0)
df.head(15)

In [None]:
df_sub = df[['model', 'mpg', 'cyl']]
df_sub.head(10)

In [None]:
conn = sqlite3.connect('cars.sqlite')
cur = conn.cursor()

table_name = 'cars'
query = f"""CREATE TABLE IF NOT EXISTS {table_name} (model TEXT, mpg REAL, cyl INTEGER)"""
cur.execute(query)
conn.commit()

df_sub.to_sql(table_name, conn, if_exists='replace', index=False)
conn.commit()

# somehow the header ends up as a database record. Remove them
cur.execute(f"""DELETE FROM {table_name} WHERE model='model';""")
conn.commit()
conn.close()

In [None]:
!ls | grep cars.sqlite

In [None]:
def read_query(sql):
    conn = sqlite3.connect('cars.sqlite')
    cur = conn.cursor()
    cur.execute(sql)
    rows = cur.fetchall()
    for row in rows:
        print(row)
    conn.close()

In [None]:
read_query('SELECT * FROM cars LIMIT 10;')

### Connect to OpenAI via LangChain

In [None]:
db = SQLDatabase.from_uri('sqlite:///cars.sqlite')

In [None]:
os.environ['OPENAI_API_KEY'] = 'your API key here'
llm = OpenAI(temperature=0)
# to obtain an API key, create an account at https://openai.com/ 
# NOTE: if you create a paid account, charges will apply. Refer to pricing page on the website for details.

In [None]:
db_chain = SQLDatabaseChain(llm=llm, database=db, verbose=True)

In [None]:
db_chain.run('Which model has the highest mpg?')

In [None]:
read_query('SELECT model, mpg FROM cars ORDER BY mpg DESC LIMIT 1;')

In [None]:
db_chain.run('What is the maximum mpg by number of cyl?')

In [None]:
read_query('SELECT cyl, MAX(mpg) FROM cars GROUP BY cyl;')

In [None]:
db_chain.run('What is the most efficient car that has 6 cylinders?')

In [None]:
read_query('SELECT DISTINCT model, mpg FROM cars WHERE cyl = 6 ORDER BY mpg DESC LIMIT 50;')

### Disclaimer:
Note: OpenAI provides a free API key for initial testing. Once you move to a paid subscription, calling the API in the way demonstrated in this example will incur monetary charges. Refer to OpenAI's pricing information for details.

Be aware that information, such as files and databases to train OpenAI's LLM can become public if applied in the way this demo demonstrates. Refer to OpenAI's usage policy for details.

This demo is for educational purposes only and for demonstrating machine learning methods. The author makes no claims that the outcomes shown here or any outcomes that could be produced by this method are accurate or reliable.