In [9]:
# ==========================
# 1. Import & Kết nối DB
# ==========================
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

# Load biến môi trường
load_dotenv()

DB_USER   = os.getenv("DB_USER")
DB_PASS   = os.getenv("DB_PASS")
DB_HOST   = os.getenv("DB_HOST")
DB_PORT   = os.getenv("DB_PORT")
DB_SILVER = os.getenv("DB_SILVER")
DB_GOLD   = os.getenv("DB_GOLD")

engine_silver = create_engine(
    f"mysql+pymysql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_SILVER}"
)
engine_gold = create_engine(
    f"mysql+pymysql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_GOLD}"
)


In [10]:
# ==========================
# 2. Load dữ liệu order_items và orders từ Silver
# ==========================
df_items = pd.read_sql("SELECT * FROM fact_order_items", engine_silver)
df_orders = pd.read_sql("SELECT order_id, total_price_after_sub_discount, total_quantity FROM fact_orders", engine_silver)

print("fact_order_items shape:", df_items.shape)
print("fact_orders shape:", df_orders.shape)

df_items.head(5)


fact_order_items shape: (46611, 4)
fact_orders shape: (40236, 3)


Unnamed: 0,order_id,product_id,variation_id,quantity
0,40458,358156d8-2777-4aab-b584-a26813fb4b95,847cf093-8f90-4f01-85ae-800ec31432a1,1
1,40455,ea5b416d-511e-47d9-841a-957fb22e68a8,4e1af645-e80d-4e74-bcd1-e23c296999cf,1
2,40454,ea5b416d-511e-47d9-841a-957fb22e68a8,46ae8aed-2296-4f5e-9665-39fd2203be6a,1
3,40453,9764f7be-e5d1-4c68-92fe-067b2437342b,06573beb-4ce8-4585-b4c9-47a426fbbab1,1
4,40452,ea5b416d-511e-47d9-841a-957fb22e68a8,a7c57d7d-9e04-4f45-8455-e0e066f5b90a,1


In [13]:
# ==========================
# 3. Transform fact_order_items → gold_fact_order_items
# ==========================
def transform_order_items(df_items, df_orders=None):
    df = df_items.copy()


    # B2. Nếu có df_orders thì phân bổ revenue xuống dòng sản phẩm
    if df_orders is not None:
        orders = df_orders[['order_id', 'total_price_after_sub_discount', 'total_quantity']]
        
        df = df.merge(orders, on='order_id', how='left')
        df['line_revenue'] = (
            (df['total_price_after_sub_discount'] / df['total_quantity']) * df['quantity']
        ).fillna(0)
        
        df.drop(columns=['total_price_after_sub_discount', 'total_quantity'], inplace=True)
    else:
        df['line_revenue'] = 0

    # B3. Giữ lại các cột quan trọng
    keep_cols = ['order_id', 'product_id', 'variation_id', 'quantity', 'line_revenue']
    df_gold = df[keep_cols]

    return df_gold

df_items_gold = transform_order_items(df_items, df_orders)
df_items_gold.head(10)


Unnamed: 0,order_id,product_id,variation_id,quantity,line_revenue
0,40458,358156d8-2777-4aab-b584-a26813fb4b95,847cf093-8f90-4f01-85ae-800ec31432a1,1,359000.0
1,40455,ea5b416d-511e-47d9-841a-957fb22e68a8,4e1af645-e80d-4e74-bcd1-e23c296999cf,1,249000.0
2,40454,ea5b416d-511e-47d9-841a-957fb22e68a8,46ae8aed-2296-4f5e-9665-39fd2203be6a,1,299000.0
3,40453,9764f7be-e5d1-4c68-92fe-067b2437342b,06573beb-4ce8-4585-b4c9-47a426fbbab1,1,249000.0
4,40452,ea5b416d-511e-47d9-841a-957fb22e68a8,a7c57d7d-9e04-4f45-8455-e0e066f5b90a,1,225000.0
5,40451,9764f7be-e5d1-4c68-92fe-067b2437342b,d5323283-4cea-48bd-a643-b2ba7ba9d956,1,225000.0
6,40450,a2e62141-7107-46a5-afcb-d79e2a6fa5a3,fd3735f8-683e-41aa-afcf-4a95dd6cea69,1,120000.0
7,40450,a2e62141-7107-46a5-afcb-d79e2a6fa5a3,587773a9-e6f3-4201-8885-25347556a8a3,1,120000.0
8,40450,a2e62141-7107-46a5-afcb-d79e2a6fa5a3,05ef659a-9d66-424c-ab2f-619fe13b4bfd,1,120000.0
9,40447,7c6bbb64-b133-440b-8bcf-5342d5c49567,ed2de062-55ab-4778-ae10-1addfc7a3dab,1,350000.0


In [14]:
# ==========================
# 4. Check dữ liệu sau transform
# ==========================
print("Shape sau transform:", df_items_gold.shape)
print("Tổng revenue từ order_items:", df_items_gold['line_revenue'].sum())

# So sánh với tổng revenue từ orders
print("Tổng revenue từ orders:", df_orders['total_price_after_sub_discount'].sum())


Shape sau transform: (46611, 5)
Tổng revenue từ order_items: 16545782490.0
Tổng revenue từ orders: 16545782490


In [15]:
# ==========================
# 5. Load vào Gold
# ==========================
df_items_gold.to_sql(
    "gold_fact_order_items",
    engine_gold,
    if_exists="replace",
    index=False
)

print("✅ Đã load gold_fact_order_items vào schema Gold.")


✅ Đã load gold_fact_order_items vào schema Gold.
