In [1]:
# import dependencies
import os
import openai
import pandas as pd

In [2]:
# Retrieve OpenAI API key from os environments
openai.api_key = os.getenv("OPENAI_API_KEY")

### Tabular Data Setup

In [3]:
# Import previously saved Kaggle sample sales data (https://www.kaggle.com/datasets/kyanyoga/sample-sales-data )
df= pd.read_csv('data/sales_data_sample.csv')

# View imported data
df.head()

Unnamed: 0,ORDERNUMBER,QUANTITYORDERED,PRICEEACH,SALES,ORDERDATE,QTR_ID,MONTH_ID,YEAR_ID,PRODUCTLINE,PHONE,ADDRESSLINE1,CITY,STATE,POSTALCODE,COUNTRY,CONTACTLASTNAME,CONTACTFIRSTNAME
0,10107,30,95.7,2871.0,2/24/2003 0:00,1,2,2003,Motorcycles,2125557818,897 Long Airport Avenue,NYC,NY,10022.0,USA,Yu,Kwai
1,10121,34,81.35,2765.9,5/7/2003 0:00,2,5,2003,Motorcycles,26.47.1555,59 rue de l'Abbaye,Reims,,51100.0,France,Henriot,Paul
2,10134,41,94.74,3884.34,7/1/2003 0:00,3,7,2003,Motorcycles,+33 1 46 62 7555,27 rue du Colonel Pierre Avia,Paris,,75508.0,France,Da Cunha,Daniel
3,10145,45,83.26,3746.7,8/25/2003 0:00,3,8,2003,Motorcycles,6265557265,78934 Hillside Dr.,Pasadena,CA,90003.0,USA,Young,Julie
4,10159,49,100.0,5205.27,10/10/2003 0:00,4,10,2003,Motorcycles,6505551386,7734 Strong St.,San Francisco,CA,,USA,Brown,Julie


### Create a temporary DB in RAM

In [4]:
from sqlalchemy import create_engine
from sqlalchemy import text

In [5]:
# Create sqlite DB
temp_db = create_engine("sqlite:///:memory:", echo=True) # echo will show output

In [7]:
# Push DataFrame to Temp DB
data = df.to_sql(name='Sales',con=temp_db)

2023-03-03 15:39:28,406 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-03-03 15:39:28,428 INFO sqlalchemy.engine.Engine PRAGMA main.table_info("Sales")
2023-03-03 15:39:28,447 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-03-03 15:39:28,452 INFO sqlalchemy.engine.Engine PRAGMA temp.table_info("Sales")
2023-03-03 15:39:28,453 INFO sqlalchemy.engine.Engine [raw sql] ()
2023-03-03 15:39:28,455 INFO sqlalchemy.engine.Engine ROLLBACK
2023-03-03 15:39:28,457 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-03-03 15:39:28,459 INFO sqlalchemy.engine.Engine 
CREATE TABLE "Sales" (
	"index" BIGINT, 
	"ORDERNUMBER" BIGINT, 
	"QUANTITYORDERED" BIGINT, 
	"PRICEEACH" FLOAT, 
	"SALES" FLOAT, 
	"ORDERDATE" TEXT, 
	"QTR_ID" BIGINT, 
	"MONTH_ID" BIGINT, 
	"YEAR_ID" BIGINT, 
	"PRODUCTLINE" TEXT, 
	"PHONE" TEXT, 
	"ADDRESSLINE1" TEXT, 
	"CITY" TEXT, 
	"STATE" TEXT, 
	"POSTALCODE" TEXT, 
	"COUNTRY" TEXT, 
	"CONTACTLASTNAME" TEXT, 
	"CONTACTFIRSTNAME" TEXT
)


2023-03-03 15:39:28,461 INFO sqlalche

In [9]:
# Test db wwith a SQL query
with temp_db.connect() as conn:
    result = conn.execute(text("SELECT SUM(SALES) FROM Sales"))

2023-03-03 15:41:40,873 INFO sqlalchemy.engine.Engine BEGIN (implicit)
2023-03-03 15:41:40,875 INFO sqlalchemy.engine.Engine SELECT SUM(SALES) FROM Sales
2023-03-03 15:41:40,877 INFO sqlalchemy.engine.Engine [generated in 0.00383s] ()
2023-03-03 15:41:40,879 INFO sqlalchemy.engine.Engine ROLLBACK


In [10]:
result.all()

[(10032628.85000001,)]