In [261]:
import pyodbc
import pandas as pd
from sqlalchemy import create_engine
from datetime import datetime
import pymysql
from sqlalchemy import text
import math

In [262]:
# === 連線到 MSSQL ===
mssql_conn = pyodbc.connect(
        'DRIVER={ODBC Driver 17 for SQL Server};'
        'SERVER=LAPTOP-J1OAU7VN;'
        'DATABASE=master;'
        'UID=sa;'
        'PWD=6124Nok45'
)
print("✅ 已連接 MSSQL")

# === 連線到 MariaDB ===
mariadb_engine = create_engine(
    'mysql+pymysql://root:6124Nok45@127.0.0.1:3306/test?charset=utf8mb4'  # ← 替換你的帳號、密碼、資料庫
)
print("✅ 已連接 MariaDB")

✅ 已連接 MSSQL
✅ 已連接 MariaDB


In [None]:
# 時間戳格式
def timestamp():
    return datetime.now().strftime("[%Y-%m-%d %H:%M:%S]")
# 建立錯誤紀錄檔
error_log_path = "error_log.txt"
# 建立開日誌檔後記錄 log 內容
def write_log(msg):
    with open(error_log_path, "a", encoding="utf-8") as log:
        log.write(f"{timestamp()} {msg}\n")
# 查出 table 資料表的主鍵，用於 ORDER BY
def get_columns(table_name, engine):
    sql = f"""SELECT COLUMN_NAME FROM INFORMATION_SCHEMA.COLUMNS WHERE TABLE_NAME = '{table_name}' ORDER BY ORDINAL_POSITION;"""
    df = pd.read_sql(sql, engine)
    return df['COLUMN_NAME'].tolist()

# 轉移資料表的順序
tables = ['factory','transgress_info','penalty_info','appeal','inspection','labor_law','labor_transgress','payment']
# 轉移資料表前先清除原始資料表內容 (只是為了在測試程式時方便，使得不用每次測試轉移都要手動清空資料表)
safe_tables_order = ['appeal','inspection','labor_transgress','labor_law','payment','penalty_info','transgress_info','factory']

total = len(tables)
write_log(f"✅ 資料轉移啟動...\n")
write_log(f"🔁 開始轉移共 {total} 張資料表...\n")
print(f"{timestamp()} 🔁 開始轉移共 {total} 張資料表...\n")

# 先依照參考鍵限制的順序安全清空所有資料表
for table in safe_tables_order:
    with mariadb_engine.connect() as conn:
            conn.execute(text(f"DELETE FROM {table}"))
            conn.commit()

# 主迴圈，遍歷每個資料表轉移資料
for i, table in enumerate(tables, 1):
    print(f"\n  正在轉移資料表 {table}...")
    write_log(f"\n  正在轉移資料表 {table}...")
    try:
        # 查出 table 的所有欄位
        table_sort_keys = get_columns(table, mssql_conn)
        # 取出 mssql 中 table 的所有資料
        df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
        mssql_count = len(df) # 記錄 table 表的總資料筆數
        batch_size = 50000    # 進行每次 50000 筆資料的批次轉移
        maria_count = 0       # 記錄 mariaDb 資料表目前的資料筆數
        # 計算當前資料表需要幾批的轉移
        total_batches = math.ceil(mssql_count / batch_size)
        
        for j in range(total_batches):
            start = j * batch_size
            # 避免最後一批超出範圍
            end = min((j + 1) * batch_size , mssql_count)
            chunk = df[start : end]
            # 將 chunk 資料寫入到 mariaDB 對應的資料表
            chunk.to_sql(name=table,con=mariadb_engine,index=False,if_exists='append')
            maria_count += len(chunk)
            
            # 驗證這一批次的轉移內容是否一致
            # 分別讀取 MSSQL 與 MariaDB 中該批資料
            
            mssql_batch_df = pd.read_sql(
                f"SELECT * FROM {table} ORDER BY (SELECT NULL) OFFSET {j * batch_size} ROWS FETCH NEXT {batch_size} ROWS ONLY;",
                mssql_conn
            )
            maria_batch_df = pd.read_sql(
                f"SELECT * FROM {table} LIMIT {batch_size} OFFSET {j * batch_size};",
                mariadb_engine
            )
            # 讓資料依照 sort_columns 排序，做 ORDER BY 的概念
            # reset_index(drop=True) 去除原始資料集的 index，重新編碼
            mssql_batch_df = mssql_batch_df.sort_values(by=table_sort_keys).reset_index(drop=True)
            maria_batch_df = maria_batch_df.sort_values(by=table_sort_keys).reset_index(drop=True)
            
            # 比對 dataframe
            if mssql_batch_df.equals(maria_batch_df):
                write_log(f"✅ 第 {j + 1} 批次比對一致（{start} ~ {end} 筆")
            else:
                write_log(f"❗第 {j + 1} 批次比對不一致\n")
                print(f"{timestamp()}❗第 {j + 1} 批次比對不一致\n")
                
                # 逐格比較兩個 DataFrame 的每個欄位值是否一樣，回傳一個新 DataFrame，顯示哪些格子值不同。
                # keep_shape=True，保留原本的 DataFrame 結構。
                # keep_equal=False 代表相同的欄位值也會保留，其值設為 Nan。
                diff = mssql_batch_df.compare(maria_batch_df, keep_shape=True, keep_equal=False)
                
                # 標出哪幾列不同
                error_indices = diff.dropna(how='all').index.tolist()
                
                write_log(f"⚠️ 共發現 {len(error_indices)} 筆不一致\n")
                
                # 選擇其中幾筆列出詳細差異（例如最多顯示前 3 筆）
                for idx in error_indices[:3]:
                    write_log(f" 第 {j + 1} 批，第 {idx} 筆資料不一致\n")
                    # 列出兩邊資料庫不一致的那列資料內容
                    write_log(f" MSSQL：{mssql_batch_df.iloc[idx].to_dict()}\n")
                    write_log(f" MariaDB：{maria_batch_df.iloc[idx].to_dict()}\n")

        # 目前資料表所有資料轉移完畢後，驗證兩邊資料表資料筆數是否一致
        maria_count_df = pd.read_sql(f"SELECT COUNT(*) AS total FROM {table}", mariadb_engine)
        maria_count = int(maria_count_df.iloc[0]['total'])
            
        if mssql_count == maria_count:
            print(f"{timestamp()} 📦 {i}/{total} {table}：{mssql_count} rows ✔ (總資料筆數一致)")
            write_log(f"📦 {i}/{total} {table}：{mssql_count} rows ✔ (總資料筆數一致)")
        else:
            print(f"{timestamp()} ⚠️ {i}/{total} {table}：MSSQL={mssql_count}, MariaDB={maria_count} (⚠️ 不一致)")
            write_log(f"❗ 總資料筆數不一致 - {table}：MSSQL={mssql_count}, MariaDB={maria_count}\n")

        # 驗證兩邊資料表資料內容是否一致
        
    # 轉移中發生資料庫操作錯誤
    except Exception as e:
        print(f"{timestamp()} ❌ {i}/{total} {table} 發生錯誤：{e}")
        write_log(f"❌ 轉移失敗 - {table}：{str(e)}\n")

print(f"\n{timestamp()} ✅ 全部轉移處理完成！請查看 error_log.txt（如有）")

[2025-06-16 01:57:22] 🔁 開始轉移共 8 張資料表...

  正在轉移資料表 factory...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(


[2025-06-16 01:57:36] 📦 1/8 factory：111479 rows ✔ (一致)
  正在轉移資料表 transgress_info...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(


[2025-06-16 01:58:58] 📦 2/8 transgress_info：362895 rows ✔ (一致)
  正在轉移資料表 penalty_info...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(


[2025-06-16 02:00:05] 📦 3/8 penalty_info：362895 rows ✔ (一致)
  正在轉移資料表 appeal...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(


[2025-06-16 02:00:08] 📦 4/8 appeal：16785 rows ✔ (一致)
  正在轉移資料表 inspection...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(


[2025-06-16 02:00:08] 📦 5/8 inspection：3086 rows ✔ (一致)
  正在轉移資料表 labor_law...

[2025-06-16 02:00:08] 📦 6/8 labor_law：17 rows ✔ (一致)
  正在轉移資料表 labor_transgress...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(
  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(


[2025-06-16 02:02:45] 📦 7/8 labor_transgress：1020443 rows ✔ (一致)
  正在轉移資料表 payment...



  df = pd.read_sql(sql, engine)
  df = pd.read_sql(f"SELECT * FROM {table}", mssql_conn)
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(
  mssql_batch_df = pd.read_sql(


[2025-06-16 02:03:12] 📦 8/8 payment：362895 rows ✔ (一致)

[2025-06-16 02:03:12] ✅ 全部轉移處理完成！請查看 error_log.txt（如有）


In [257]:
get_columns('appeal', mssql_conn)

  df = pd.read_sql(sql, engine)


['caseId',
 'document_no',
 'fac_uniformno',
 'fac_name',
 'appeal_or_rescind',
 'ispetition',
 'lawsuit_date_1',
 'petition_agency',
 'petition_results']

In [238]:
mssql_batch_df = pd.read_sql(
                f"SELECT * FROM factory ORDER BY (SELECT NULL) OFFSET 50000 ROWS FETCH NEXT 50000 ROWS ONLY;",
                mssql_conn
            )
maria_batch_df = pd.read_sql(
                f"SELECT * FROM factory LIMIT 50000 OFFSET 50000;",
                mariadb_engine
            )

  mssql_batch_df = pd.read_sql(


In [239]:
maria_batch_df

Unnamed: 0,fac_uniformno,fac_name,fac_area_code,fac_address,ems_no
0,41583802,永建礦業股份有限公司,10002030,宜蘭縣蘇澳鎮蘇南路24之1號4樓,U0103375
1,41608901,大原工礦股份有限公司大原工礦股份有限公司申請核定變更核定及部分註銷和平事業區第85林班地礦業...,10002080,宜蘭縣冬山鄉成興路690巷1號1樓,G41097000054
2,41608901,大原工礦股份有限公司選碎礦場,10002120,宜蘭縣南澳鄉瀑布段一七地號,G4103321
3,41609107,光南冷凍廠股份有限公司,10002030,宜蘭縣蘇澳鎮華山路一四九號,G32097000171
4,41615109,同榮實業股份有限公司,10002080,宜蘭縣冬山鄉香和村香中路一號,G3700906
...,...,...,...,...,...
49995,96917698,宥誠科技股份有限公司,10004010,新竹縣竹北市十興里犁頭山下四七號,J5505521
49996,96917894,正辦實業有限公司,66000250,臺中市龍井區山腳里中山中路二段五四一巷六號,L0200820
49997,96918036,新福星營造有限公司,65000100,新北市淡水區民權路四二號五樓,F10097008738
49998,96918351,利政營造有限公司,10008030,南投縣草屯鎮富林路一段一六九號二樓,M35097003679


In [240]:
mssql_batch_df

Unnamed: 0,fac_uniformno,fac_name,fac_area_code,fac_address,ems_no
0,41583802,永建礦業股份有限公司,10002030,宜蘭縣蘇澳鎮蘇南路24之1號4樓,U0103375
1,41608901,大原工礦股份有限公司大原工礦股份有限公司申請核定變更核定及部分註銷和平事業區第85林班地礦業...,10002080,宜蘭縣冬山鄉成興路690巷1號1樓,G41097000054
2,41608901,大原工礦股份有限公司選碎礦場,10002120,宜蘭縣南澳鄉瀑布段一七地號,G4103321
3,41609107,光南冷凍廠股份有限公司,10002030,宜蘭縣蘇澳鎮華山路一四九號,G32097000171
4,41615109,同榮實業股份有限公司,10002080,宜蘭縣冬山鄉香和村香中路一號,G3700906
...,...,...,...,...,...
49995,96917698,宥誠科技股份有限公司,10004010,新竹縣竹北市十興里犁頭山下四七號,J5505521
49996,96917894,正辦實業有限公司,66000250,臺中市龍井區山腳里中山中路二段五四一巷六號,L0200820
49997,96918036,新福星營造有限公司,65000100,新北市淡水區民權路四二號五樓,F10097008738
49998,96918351,利政營造有限公司,10008030,南投縣草屯鎮富林路一段一六九號二樓,M35097003679


In [156]:
mssql_batch_df = pd.read_sql(
                f"SELECT * FROM factory ORDER BY fac_uniformno,fac_area_code,fac_address,ems_no OFFSET 100000 ROWS FETCH NEXT 50000 ROWS ONLY;",
                mssql_conn
            )
maria_batch_df = pd.read_sql(
                f"SELECT * FROM factory ORDER BY fac_uniformno,fac_area_code,fac_address,ems_no LIMIT 50000 OFFSET 100000;",
                mariadb_engine
            )

  mssql_batch_df = pd.read_sql(


In [212]:
mssql_batch_df.iloc[0:2]

Unnamed: 0,fac_uniformno,fac_name,fac_area_code,fac_address,ems_no
0,96919128,戊艮營造股份有限公司,10009010,雲林縣斗六市榴南里南仁路九○之一五號一樓,P46097004479
1,96919302,尚谷營造有限公司,65000050,新北市新莊區新北大道七段四九八巷一八號一三樓,F06097018114


In [213]:
maria_batch_df.iloc[0:2]

Unnamed: 0,fac_uniformno,fac_name,fac_area_code,fac_address,ems_no
0,96919128,戊艮營造股份有限公司天上人間精品旅館新建工程,10009010,雲林縣斗六市海豐崙段朱丹灣小段六二之一三八地號,P46A5376
1,96919302,尚谷營造有限公司,65000050,新北市新莊區新北大道七段四九八巷一八號一三樓,F06097018114
