# Extracting data to SQLite...

### 1)  Load the SQLite package...

In [24]:
import sqlite3

print(sqlite3.version)
print(sqlite3.sqlite_version)

2.6.0
3.35.4


### 2) Connect to the database.  

In [25]:
slconn = sqlite3.connect('try.db') # permanent database

In [26]:
print(slconn)

<sqlite3.Connection object at 0x7fb27a92f300>


### 3)  Create a cursor to execute statements to SQLite.

In [27]:
cursor = slconn.cursor()

In [28]:
cursor

<sqlite3.Cursor at 0x7fb27a923110>

In [29]:
from sqlalchemy import create_engine

# connection string: driver://username:password@server/database
engine = create_engine('postgresql+psycopg2://postgres:bryan@localhost/try')

In [30]:
import pandas as pd

def get_columns(tablename):
    return pd.read_sql_query('''select ordinal_position, column_name, data_type  
                                from information_schema.columns 
                                where table_name = '{}'  
                             ''' .format(tablename)
                             ,engine)

In [31]:
get_columns('sales')

Unnamed: 0,ordinal_position,column_name,data_type
0,1,index,bigint
1,2,ProductKey,bigint
2,3,OrderDateKey,bigint
3,4,DueDateKey,bigint
4,5,ShipDateKey,bigint
5,6,CustomerKey,bigint
6,7,PromotionKey,bigint
7,8,CurrencyKey,bigint
8,9,SalesTerritoryKey,bigint
9,10,SalesOrderNumber,text


## Let's extract data from the backend databases, and do some data munging to add value.  

In [32]:
sql = '''
select c."TotalChildren" as numchildren, 
       c."EnglishEducation" as education, 
       c."MaritalStatus" as maritalstatus, 
       c."EnglishOccupation" as occupation, 
       DATE_PART('year', current_date::date) - DATE_PART('year', "BirthDate"::date) as age,
       "SalesAmount" as salesamount
from sales          s
join customer       c
on (s."CustomerKey" = c."CustomerKey" )
'''

In [33]:
pd.read_sql_query(sql, engine).head(2)

Unnamed: 0,numchildren,education,maritalstatus,occupation,age,salesamount
0,5,Bachelors,S,Management,69.0,3578.27
1,3,High School,S,Manual,51.0,3399.99


In [34]:
salesdatadf = pd.read_sql_query(sql, engine)

In [35]:
# Store the results in our SQLite data warehouse

# if_exists options are replace, append, 
salesdatadf.to_sql('salesdata', con=slconn, index=False, if_exists='replace')

#### Getting meta data...

In [36]:
salesdatadf.columns 

Index(['numchildren', 'education', 'maritalstatus', 'occupation', 'age',
       'salesamount'],
      dtype='object')

In [37]:
salesdatadf.dtypes

numchildren        int64
education         object
maritalstatus     object
occupation        object
age              float64
salesamount      float64
dtype: object

In [38]:
# Get table schema...
import pandas as pd 

pd.read_sql_query("""
PRAGMA table_info('salesdata');
""", slconn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,numchildren,INTEGER,0,,0
1,1,education,TEXT,0,,0
2,2,maritalstatus,TEXT,0,,0
3,3,occupation,TEXT,0,,0
4,4,age,REAL,0,,0
5,5,salesamount,REAL,0,,0


### Querying data in SQLite...

In [39]:
import sqlite3
import pandas as pd

pd.read_sql_query("SELECT * FROM salesdata", slconn).head(4).round(0)

Unnamed: 0,numchildren,education,maritalstatus,occupation,age,salesamount
0,5,Bachelors,S,Management,69.0,3578.0
1,3,High School,S,Manual,51.0,3400.0
2,5,High School,S,Professional,69.0,3400.0
3,4,Graduate Degree,M,Management,78.0,699.0


In [49]:
# Formatting output in Post5greSQL -  https://www.postgresql.org/docs/8.3/functions-formatting.html
# Formatting numbers with SQLite - https://stackoverflow.com/questions/48716160/how-to-format-a-float-number-in-sqlite

pd.read_sql_query('''select education,age, printf('%,d', sum(salesamount)) as sales 
                     from salesdata 
                     group by education
                     order by age
                  ''', slconn, index_col='education')

Unnamed: 0_level_0,age,sales
education,Unnamed: 1_level_1,Unnamed: 2_level_1
High School,51.0,4638026
Partial High School,51.0,1636405
Partial College,61.0,7723542
Bachelors,69.0,9900142
Graduate Degree,78.0,5460560


# Let's close the connection. 

In [22]:
#pgconn.close()
engine.dispose()
slconn.close()