In [1]:
#coding=utf-8
import xlwings as xw
import re
import pandas as pd
import psycopg2
from modules.my_libs import han_ji_chu_im as ji

#==========================================================
# Excel 檔案
#==========================================================
# file_path = 'to-tek-keng.xlsx'
file_path = 'hoo-goa-chu-im.xlsx'
wb = xw.Book(file_path)

# source_sheet = wb.sheets['ji-im-tui-chiau']
source_sheet = wb.sheets['工作表1']
end_row = source_sheet.range('A' + str(source_sheet.cells.last_cell.row)).end('up').row
print(f'end_row = {end_row}')

#==========================================================
# 備妥程式需使用之工作表
#==========================================================
sheet_name_list = ["字庫表", "缺字表", "漢字注音表"]
#-----------------------------------------------------
# 刪除原有之工作表
for sheet_name in sheet_name_list:
    sheet = wb.sheets[sheet_name]
    try:
        sheet.select()
        sheet.delete()
    except: 
        # CommandError:
        continue

#-----------------------------------------------------
# 新增程式需使用之工作表
ji_khoo_sheet  = wb.sheets.add(name="字庫表")
khiam_ji_sheet  = wb.sheets.add(name="缺字表")
chu_im_sheet = wb.sheets.add(name="漢字注音表")


漢字：昧 ==> 注音碼：boenn2 ==> 十五音注音：糜二門
end_row = 2


In [2]:
#==========================================================
# 資料庫
#==========================================================
conn = psycopg2.connect(database="alanjui", user="alanjui", host="127.0.0.1", port="5432")
cur = conn.cursor()

row = 1     # index for source sheet
chu_im_index = 1
khiam_ji_index = 1
end_counter = end_row + 1

while row < end_counter:
    print(f'row = {row}')
    # 自 source_sheet 取待注音漢字
    han_ji = str(source_sheet.range('A' + str(row)).value)
    han_ji.strip()
    
    #=========================================================
    # 將【整段】讀入的漢字，轉成：每個儲存格，放一個漢字
    #=========================================================
    han_ji_list = ji.convert_string_to_list(han_ji)
    
    chu_im_sheet.range('A' + str(chu_im_index)) \
                .options(transpose=True).value = han_ji_list

    #=========================================================
    # 將漢字加注音
    #=========================================================
    i = chu_im_index
    # 取用每一個漢字，自【字庫】資料庫查找其【注音碼】
    for search_han_ji in han_ji_list:
        # 若取出之字為標點符號，則跳過，並繼續取下一個漢字。
        # piau_tiam = r"[，、：；。？！（）「」【】《》“]"
        piau_tiam = r"[；：？！\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F]"
        searchObj = re.search( piau_tiam, search_han_ji, re.M|re.I)
        if searchObj:
            i += 1
            continue
        
        # SQL 查詢指令：自字庫查找某漢字之注音碼
        #sql = f"select id, han_ji, chu_im, freq, siann, un, tiau from han_ji where han_ji='{search_han_ji}'"
        sql = "SELECT id, han_ji, chu_im, freq, siann, un, tiau "\
                "FROM han_ji "\
                f"WHERE han_ji='{search_han_ji}' "
        cur.execute(sql)
        query_rows = cur.fetchall()
        
        # 判斷是否發生：找不到漢字注音碼的問題
        if not query_rows:
            print(f"Can not find 【{search_han_ji}】in Han-Ji-Khoo!!")
            khiam_ji_sheet.range('A' + str(khiam_ji_index)).value = search_han_ji
            khiam_ji_index += 1
            i += 1
            continue
 
        # 將已找到注音碼的漢字，分解出：聲母、韻母、調號
        chu_im = query_rows[0][2]
        chu_im_code = ji.split_chu_im(chu_im)
        if chu_im_code[0] == "q":
            siann_bu = ""
        else:
            siann_bu = chu_im_code[0]
        un_bu = chu_im_code[1]
        tiau_ho = chu_im_code[2]
        
        # 取得聲母之聲母碼；及取得韻母之韻母碼；以利後續轉換：十五音、方音符號、
        # 白話字、羅馬拼音、閩拼時使用。
        if siann_bu != "":
            siann_index = ji.get_siann_idx(siann_bu)
        un_index = ji.get_un_idx(un_bu)
        if un_index == -1:
            # 記錄沒找到之韻母
            print(f"Can not find 【{search_han_ji}】in Han-Ji-Khoo!!")
            khiam_ji_sheet.range('A' + str(khiam_ji_index)).value = f"{search_han_ji}: (Un-Bu: {un_bu})"
            khiam_ji_index += 1
        sip_ngoo_im_index = ji.get_sip_ngoo_im_idx(un_index)
        
        #=========================================================
        # 使用注音碼，取得各式之〖 注音／拼音〗
        #=========================================================
        
        # 輸出十五音
        sip_ngoo_im_chu_im = ji.get_sip_ngoo_im_chu_im(siann_index, un_index, tiau_ho)
        
        # 輸出方音符號
        TPS_chu_im = ji.get_TPS_chu_im(siann_index, un_index, tiau_ho)
        
        #=========================================================
        # 將已注音之漢字加入【漢字注音表】
        #=========================================================
        chu_im_sheet.range('B' + str(i)).value = chu_im
        chu_im_sheet.range('C' + str(i)).value = siann_bu
        chu_im_sheet.range('D' + str(i)).value = un_bu
        chu_im_sheet.range('E' + str(i)).value = tiau_ho

        chu_im_sheet.range('F' + str(i)).value = siann_index + 1
        chu_im_sheet.range('G' + str(i)).value = un_index + 1
        chu_im_sheet.range('H' + str(i)).value = sip_ngoo_im_index
        
        
        chu_im_sheet.range('J' + str(i)).value = sip_ngoo_im_chu_im
        chu_im_sheet.range('K' + str(i)).value = TPS_chu_im
        
        i += 1

        #=========================================================
        # 將已注音之漢字加入【字庫表】
        #=========================================================

    #=========================================================
    # 調整讀取來源；寫入標的各手標
    #=========================================================
    chu_im_index += len(han_ji_list)
    row += 1
    
#==========================================================
# 關閉資料庫
#==========================================================
conn.close()

row = 1
row = 2
Can not find 【
】in Han-Ji-Khoo!!
Can not find 【軾】in Han-Ji-Khoo!!
Can not find 【
】in Han-Ji-Khoo!!
Can not find 【軾】in Han-Ji-Khoo!!
Can not find 【隋】in Han-Ji-Khoo!!
Can not find 【荊】in Han-Ji-Khoo!!
Can not find 【隴】in Han-Ji-Khoo!!
