#### Load the prepared dataframe 

In [5]:
import pandas as pd
import sqlite3

In [6]:
# load raw data
df = pd.read_csv("../data/DataCoSupplyChainDataset.csv", encoding="latin1")

# convert dates
df["order date (DateOrders)"] = pd.to_datetime(
    df["order date (DateOrders)"],
    format="mixed",
    dayfirst=True
)

df["shipping date (DateOrders)"] = pd.to_datetime(
    df["shipping date (DateOrders)"],
    format="mixed",
    dayfirst=True
)

df[["order date (DateOrders)", "shipping date (DateOrders)"]].dtypes

# create delay metrics
df["delay_days"] = (
    df["Days for shipping (real)"] -
    df["Days for shipment (scheduled)"]
)
df["is_delayed"] = (df["delay_days"] > 0).astype(int)

#### Create the fact table dataframe

In [8]:
cols = [
    "Order Id",
    "order date (DateOrders)",
    "shipping date (DateOrders)",
    "Days for shipping (real)",
    "Days for shipment (scheduled)",
    "delay_days",
    "is_delayed",
    "Late_delivery_risk",
    "Sales",
    "Order Item Total",
    "Order Profit Per Order",
    "Product Card Id",
    "Product Name",
    "Category Name",
    "Department Name",
    "Order Country",
    "Order Region",
    "Shipping Mode"
]

orders_fact = df[cols]

#### Create SQLite DB + insert data

In [10]:
# create / connect database
conn = sqlite3.connect("../sql/supply_chain.db")

# load dataframe into SQL table
orders_fact.to_sql(
    name="orders_fact",
    con=conn,
    if_exists="replace",
    index=False
)

conn.close()

#### Verify insertion

In [12]:
conn = sqlite3.connect("../sql/supply_chain.db")

df_check = pd.read_sql(
    "SELECT COUNT(*) AS row_count FROM orders_fact",
    conn
)

conn.close()

df_check

Unnamed: 0,row_count
0,180519


In [2]:
import sqlite3
import pandas as pd

conn = sqlite3.connect("../sql/supply_chain.db")

df_final = pd.read_sql("SELECT * FROM orders_fact", conn)

conn.close()

df_final.to_csv("../data/orders_fact_final.csv", index=False)