In [113]:
import sqlite3
import pandas as pd

In [114]:
# Load your csv file
customer = pd.read_csv('../datasets/customer.csv')
shipment = pd.read_csv('../datasets/shipment.csv')
product = pd.read_csv('../datasets/product.csv')
order = pd.read_csv('../datasets/order.csv')
geolocation = pd.read_csv('../datasets/geolocation.csv')
sale = pd.read_csv('../datasets/sale.csv')


In [115]:
# Data clean up
customer.columns = customer.columns.str.strip()
shipment.columns = shipment.columns.str.strip()
product.columns = product.columns.str.strip()
order.columns = order.columns.str.strip()
geolocation.columns = geolocation.columns.str.strip()
sale.columns = sale.columns.str.strip()

In [116]:
# Create/connect to a SQLite database
connection = sqlite3.connect('superstore_db.db')

In [121]:
# Create the tables with primary keys and foreign keys
cursor = connection.cursor()
cursor.execute("PRAGMA foreign_keys = 1")

cursor.execute('''
    CREATE TABLE IF NOT EXISTS customer (
        customer_id TEXT PRIMARY KEY,
        customer_name TEXT,
        segment TEXT
    )
''')
               
cursor.execute('''
    CREATE TABLE IF NOT EXISTS product (
        product_id TEXT PRIMARY KEY,
        category TEXT,
        sub_category TEXT,
        product_name TEXT
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS sale (
        transaction_id TEXT PRIMARY KEY,
        price REAL,
        quantity INTEGER,
        discount REAL,
        profit REAL
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS geolocation (
        geolocation_id TEXT PRIMARY KEY,
        country TEXT,
        city TEXT,
        state TEXT,
        postal_code INTEGER,
        region TEXT
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS shipment (
        ship_id INTEGER PRIMARY KEY,
        ship_date TEXT,
        ship_mode TEXT,
        geolocation_number TEXT,
        FOREIGN KEY(geolocation_number) REFERENCES geolocation(geolocation_id)
    )
''')

cursor.execute('''
    CREATE TABLE IF NOT EXISTS "order" (
        order_id TEXT PRIMARY KEY,
        order_date TEXT,
        customer_number TEXT,
        product_number TEXT,
        transaction_number TEXT,
        ship_number TEXT,
        FOREIGN KEY(customer_number) REFERENCES customer(customer_id),
        FOREIGN KEY(product_number) REFERENCES product(product_id),
        FOREIGN KEY(transaction_number) REFERENCES sale(transaction_id),
        FOREIGN KEY(ship_number) REFERENCES shipment(ship_id)    
    )
''')

<sqlite3.Cursor at 0x7fe1f4ea4f10>

In [122]:
# Load datafiles to SQLite
# Insert values into the "product" table
product.to_sql('product', connection, if_exists='replace', index=False)

# Insert values into the "customer" table
customer.to_sql('customer', connection, if_exists='replace', index=False)

# Insert values into the "geolocation" table
geolocation.to_sql('geolocation', connection, if_exists='replace', index=False)

# Insert values into the "shipment" table
shipment.to_sql('shipment', connection, if_exists='replace', index=False)

# Insert values into the "order" table
order.to_sql('order', connection, if_exists='replace', index=False)

# Insert values into the "sale" table
sale.to_sql('sale', connection, if_exists='replace', index=False)


9994

In [123]:
# Commit the changes and close the connection
connection.commit()
connection.close()