In [1]:
import numpy as np
import pandas as pd
import yaml
from sqlalchemy import create_engine
from datetime import date

# database connections 

In [2]:
# database connections
with open('../config_fill.yml', 'r') as f:
    config = yaml.safe_load(f)
    config_aw = config['Adventure_Works']
    config_etl = config['ETL_PRO']

url_aw = f"mssql+pyodbc://@{config_aw['host']}/{config_aw['dbname']}?driver={config_aw['driver'].replace(' ', '+')}&trusted_connection={config_aw['trusted_connection']}"
url_etl = f"{config_etl['drivername']}://{config_etl['user']}:{config_etl['password']}@{config_etl['host']}:{config_etl['port']}/{config_etl['dbname']}"

aw_engine = create_engine(url_aw)
etl_engine = create_engine(url_etl)

In [4]:
query_product = """
SELECT
    p.ProductID,
    p.Name AS ProductName,
    p.ProductNumber,
    p.Color,
    p.StandardCost,
    p.ListPrice,
    p.Size,
    p.Weight,
    p.ProductSubcategoryID,
    p.ProductModelID,
    p.SellStartDate,
    p.SellEndDate,
    ps.Name AS SubcategoryName,
    pc.Name AS CategoryName
FROM Production.Product p
LEFT JOIN Production.ProductSubcategory ps
    ON p.ProductSubcategoryID = ps.ProductSubcategoryID
LEFT JOIN Production.ProductCategory pc
    ON ps.ProductCategoryID = pc.ProductCategoryID;
"""

df_product = pd.read_sql(query_product, aw_engine)
df_product.head()


Unnamed: 0,ProductID,ProductName,ProductNumber,Color,StandardCost,ListPrice,Size,Weight,ProductSubcategoryID,ProductModelID,SellStartDate,SellEndDate,SubcategoryName,CategoryName
0,1,Adjustable Race,AR-5381,,0.0,0.0,,,,,2008-04-30,NaT,,
1,2,Bearing Ball,BA-8327,,0.0,0.0,,,,,2008-04-30,NaT,,
2,3,BB Ball Bearing,BE-2349,,0.0,0.0,,,,,2008-04-30,NaT,,
3,4,Headset Ball Bearings,BE-2908,,0.0,0.0,,,,,2008-04-30,NaT,,
4,316,Blade,BL-2036,,0.0,0.0,,,,,2008-04-30,NaT,,


In [5]:
df_product = df_product.fillna({
    "Color": "Unknown",
    "Size": "Unknown",
    "Weight": 0,
    "SubcategoryName": "Unknown",
    "CategoryName": "Unknown"
})


In [6]:
df_product.rename(columns={
    "ProductID": "product_id",
    "ProductName": "product_name",
    "ProductNumber": "product_number",
    "StandardCost": "standard_cost",
    "ListPrice": "list_price",
    "SubcategoryName": "subcategory",
    "CategoryName": "category"
}, inplace=True)


In [7]:
df_product.insert(0, "product_key", range(1, len(df_product) + 1))


In [8]:
df_product.head()
df_product.shape


(504, 15)

In [9]:
df_product.to_sql(
    "dim_product",
    etl_engine,
    if_exists="replace",
    index=False
)


504