In [1]:
# ==========================
# 1. Import & Kết nối DB
# ==========================
import os
import pandas as pd
from sqlalchemy import create_engine
from dotenv import load_dotenv

# Load biến môi trường
load_dotenv()

DB_USER   = os.getenv("DB_USER")
DB_PASS   = os.getenv("DB_PASS")
DB_HOST   = os.getenv("DB_HOST")
DB_PORT   = os.getenv("DB_PORT")
DB_GOLD   = os.getenv("DB_GOLD")

engine_gold = create_engine(
    f"mysql+pymysql://{DB_USER}:{DB_PASS}@{DB_HOST}:{DB_PORT}/{DB_GOLD}"
)


In [2]:
# ==========================
# 2. Hàm tạo bảng dim_date
# ==========================
def create_dim_date(start='2023-01-01', end='2026-12-31'):
    dates = pd.date_range(start=start, end=end, freq='D')
    df = pd.DataFrame({'date': dates})
    
    df['date_id'] = df['date'].dt.strftime('%Y%m%d').astype(int)
    df['day'] = df['date'].dt.day
    df['day_of_week'] = df['date'].dt.dayofweek + 1  # 1=Mon
    df['day_name'] = df['date'].dt.day_name()
    df['week_of_year'] = df['date'].dt.isocalendar().week
    df['month'] = df['date'].dt.month
    df['month_name'] = df['date'].dt.month_name()
    df['quarter'] = df['date'].dt.to_period('Q').astype(str)
    df['year'] = df['date'].dt.year
    df['is_weekend'] = df['day_of_week'].apply(lambda x: 1 if x >= 6 else 0)
    
    return df


In [3]:
# ==========================
# 3. Sinh dữ liệu dim_date
# ==========================
df_dim_date = create_dim_date('2023-01-01', '2026-12-31')

print("Số dòng:", len(df_dim_date))
df_dim_date.head(10)


Số dòng: 1461


Unnamed: 0,date,date_id,day,day_of_week,day_name,week_of_year,month,month_name,quarter,year,is_weekend
0,2023-01-01,20230101,1,7,Sunday,52,1,January,2023Q1,2023,1
1,2023-01-02,20230102,2,1,Monday,1,1,January,2023Q1,2023,0
2,2023-01-03,20230103,3,2,Tuesday,1,1,January,2023Q1,2023,0
3,2023-01-04,20230104,4,3,Wednesday,1,1,January,2023Q1,2023,0
4,2023-01-05,20230105,5,4,Thursday,1,1,January,2023Q1,2023,0
5,2023-01-06,20230106,6,5,Friday,1,1,January,2023Q1,2023,0
6,2023-01-07,20230107,7,6,Saturday,1,1,January,2023Q1,2023,1
7,2023-01-08,20230108,8,7,Sunday,1,1,January,2023Q1,2023,1
8,2023-01-09,20230109,9,1,Monday,2,1,January,2023Q1,2023,0
9,2023-01-10,20230110,10,2,Tuesday,2,1,January,2023Q1,2023,0


In [4]:
# ==========================
# 4. Load dim_date vào Gold
# ==========================
df_dim_date.to_sql(
    "gold_dim_date",
    engine_gold,
    if_exists="replace",  # tạo mới, thay thế nếu tồn tại
    index=False
)

print("✅ Đã load gold_dim_date vào schema Gold.")


✅ Đã load gold_dim_date vào schema Gold.
