# DBInit

One-time database initialization that needs to be run before importing any data or performing any queries.

In [1]:
import os
import pandas as pd
import sqlite3 as sql

def initialize_database(db_path = '../products.sql', schema_script = 'sql_schema', verbosity = 1, force_delete = False) -> sql.Connection:
    if force_delete and os.path.exists(db_path):
        os.remove(db_path)
    db_exists = os.path.exists(db_path)
    conn = sql.connect(db_path)
    if not db_exists:
        # created new DB for the first time. Somebody set up us the tables:
        if verbosity > 0:
            print(f'Creating new database schema...')
        with open(schema_script) as file:
            commands = file.read()
            conn.executescript(commands)
    if verbosity > 0:
        print(f'Connected to product database {db_path}')
    return conn

## Verification

Let's run the above initialization script (if needed) and verify results by checking our overall database schema:

In [9]:
conn = initialize_database()

pd.read_sql_query("SELECT * FROM sqlite_master ORDER BY tbl_name", conn)

Connected to product database ../products.sql


Unnamed: 0,type,name,tbl_name,rootpage,sql
0,table,product,product,7,CREATE TABLE product(\n id VARCHAR(36) PRIM...
1,index,sqlite_autoindex_product_1,product,8,
2,index,product_category_id,product,4837,CREATE INDEX product_category_id ON product(ca...
3,table,review,review,2,"CREATE TABLE review(\n user_id VARCHAR(36),..."
4,index,review_product_id,review,910296,CREATE INDEX review_product_id ON review(produ...
5,index,review_user_id_product_id,review,516,CREATE INDEX review_user_id_product_id ON revi...
6,table,sqlite_stat1,sqlite_stat1,3145,"CREATE TABLE sqlite_stat1(tbl,idx,stat)"
7,table,sqlite_stat4,sqlite_stat4,3220,"CREATE TABLE sqlite_stat4(tbl,idx,neq,nlt,ndlt..."


In [4]:
# We can also inspect the schema of each table:
pd.read_sql_query("PRAGMA table_info(product)", conn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,id,VARCHAR(36),1,,1
1,1,title,TEXT,0,'',0
2,2,title_search,TEXT,0,'',0
3,3,creator,TEXT,0,'',0
4,4,creator_search,TEXT,0,'',0
5,5,publisher,TEXT,0,'',0
6,6,description,TEXT,0,'',0
7,7,category,TEXT,0,'',0
8,8,subcategory,TEXT,0,'',0
9,9,release_date,DATE,0,,0


In [10]:
pd.read_sql_query("PRAGMA table_info(review)", conn)

Unnamed: 0,cid,name,type,notnull,dflt_value,pk
0,0,user_id,VARCHAR(36),0,,0
1,1,product_id,VARCHAR(36),1,,0
2,2,title,TEXT,0,'',0
3,3,review,TEXT,0,'',0
4,4,rating,"DECIMAL(2,1)",1,,0
5,5,upvotes,INTEGER,1,,0
6,6,downvotes,INTEGER,0,0,0
7,7,timestamp,TIMESTAMP,1,,0
