In [1]:
#coding=utf-8
import xlwings as xw
import re
import pandas as pd
import psycopg2

from modules.my_libs import import_ipynb
from nbpackage import han_ji_chu_im as ji

#==========================================================
# Excel 檔案
#==========================================================

# 指定提供來源的【檔案】
file_path = 'hoo-goa-chu-im.xlsx'
wb = xw.Book(file_path)

# 指定提供來源的【工作表】；及【總列數】
source_sheet = wb.sheets['工作表1']
end_row = source_sheet.range('A' + str(source_sheet.cells.last_cell.row)).end('up').row
print(f'end_row = {end_row}')

#==========================================================
# 備妥程式需使用之工作表
#==========================================================
sheet_name_list = ["字庫表", "缺字表", "漢字注音表"]
#-----------------------------------------------------
# 檢查工作表是否已存在
for sheet_name in sheet_name_list:
    sheet = wb.sheets[sheet_name]
    try:
        sheet.select()
        continue
    except: 
        # CommandError 的 Exception 發生日，表工作表不存在
        # 新增程式需使用之工作表
        wb.sheets.add(name=sheet_name)

khiam_ji_sheet = wb.sheets["缺字表"]
ji_khoo_sheet  = wb.sheets["字庫表"]
chu_im_sheet   = wb.sheets["漢字注音表"]


importing Jupyter notebook from /Users/alanjui/workspace/rime/ho-lok-oe-chu-im/nbpackage/han_ji_chu_im.ipynb
漢字：雨 ==> 注音碼：qu2 ==> 注音符號： ú
漢字：狐 ==> 注音碼：hoo5 ==> 注音符號：hǒo
漢字：虺 ==> 注音碼：qui2 ==> 注音符號： úi
漢字：有 ==> 注音碼：qiu2 ==> 注音符號： íu
漢字：鏢 ==> 注音碼：pio1 ==> 注音符號：pio
漢字：語 ==> 注音碼：gi2 ==> 注音符號：gí
漢字：臺 ==> 注音碼：tai5 ==> 注音符號：tǎi
漢字：野 ==> 注音碼：qia2 ==> 注音符號： ía
漢字：欠 ==> 注音碼：khiam3 ==> 注音符號：khìam
漢字：德 ==> 注音碼：tek4 ==> 注音符號：tik
漢字：元 ==> 注音碼：goan5 ==> 注音符號：gǔan
漢字：字 ==> 注音碼：ji7 ==> 注音符號：jī
漢字：俗 ==> 注音碼：siok8 ==> 注音符號：si̍ok
漢字：聲 ==> 注音碼：siann1 ==> 注音符號：siann
漢字：生 ==> 注音碼：chhinn1 ==> 注音符號：tshinn
end_row = 250


In [2]:
#==========================================================
# 資料庫
#==========================================================
conn = psycopg2.connect(database="alanjui", user="alanjui", host="127.0.0.1", port="5432")
cur = conn.cursor()

row = 1     # index for source sheet
chu_im_index = 1
ji_khoo_index = 1
khiam_ji_index = 1
end_counter = end_row + 1

while row < end_counter:
    print(f'row = {row}')
    # 自 source_sheet 取待注音漢字
    han_ji = str(source_sheet.range('A' + str(row)).value)
    han_ji.strip()
    
    #=========================================================
    # 如是空白或換行，處理換行
    #=========================================================
    if han_ji == '' or han_ji == '\n' or han_ji == 'None':
        chu_im_sheet.range('A' + str(chu_im_index)).value = '\n'
        chu_im_index += 1
        row += 1
        continue
   
    #=========================================================
    # 自【來源工作表】，讀入【整段】的漢字，轉存到目的工作表：【漢字注音表】
    # 在【漢字注音表】的每個儲存格，只存放一個漢字
    #=========================================================
    han_ji_list = ji.convert_string_to_list(han_ji)
    
    chu_im_sheet.range('A' + str(chu_im_index)) \
                .options(transpose=True).value = han_ji_list

    #=========================================================
    # 將整段讀入的漢字，逐一加注音
    #=========================================================
    i = chu_im_index
    # 取用每一個漢字，自【字庫】資料庫查找其【注音碼】
    for search_han_ji in han_ji_list:
        # 若取出之字為標點符號，則跳過，並繼續取下一個漢字。
        # piau_tiam = r"[，、：；。？！（）「」【】《》“]"
        piau_tiam = r"[；：？！\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F]"
        searchObj = re.search(piau_tiam, search_han_ji, re.M|re.I)
        if searchObj:
            i += 1
            continue
        
        # SQL 查詢指令：自字庫查找某漢字之注音碼
        #sql = f"select id, han_ji, chu_im, freq, siann, un, tiau from han_ji where han_ji='{search_han_ji}'"
        sql =   "SELECT id, han_ji, chu_im, freq, siann, un, tiau "\
                "FROM han_ji "\
               f"WHERE han_ji='{search_han_ji}' "\
                "ORDER BY freq DESC;"
        cur.execute(sql)
        query_rows = cur.fetchall()
        
        # 漢字能否查到注音碼，將有不同的處理作業
        if not query_rows:
            # 問題發生：找不到漢字的注音碼
            print(f"Can not find 【{search_han_ji}】in Han-Ji-Khoo!!")
            khiam_ji_sheet.range('A' + str(khiam_ji_index)).value = search_han_ji
            khiam_ji_index += 1
            i += 1
            continue
        else:
            # 漢字查到注音；遇漢字有多種讀音，需於【字庫表】留紀錄
            ji_soo = len(query_rows)
            for ji_found in range(ji_soo):
                # 若查到注音的漢字，有兩個以上；則需記錄漢字的 ID 編碼
                han_ji_id = query_rows[ji_found][0]
                # 將已找到注音碼的漢字，逐一分解出：聲母、韻母、調號
                chu_im = query_rows[ji_found][2]
                chu_im_code = ji.split_chu_im(chu_im)
                siann_bu = chu_im_code[0].strip()
                un_bu = chu_im_code[1]
                tiau_ho = chu_im_code[2]

                # 取得聲母之聲母碼
                if siann_bu != "":
                    siann_index = ji.get_siann_idx(siann_bu)
                
                # 取得韻母之韻母碼
                un_index = ji.get_un_idx(un_bu)
                if un_index == -1:
                    # 記錄沒找到之韻母
                    print(f"Can not find 【{search_han_ji}】in Han-Ji-Khoo!!")
                    khiam_ji_sheet.range('A' + str(khiam_ji_index)).value = f"{search_han_ji}: (Un-Bu: {un_bu})"
                    khiam_ji_index += 1
        
                # 取得韻母在十五音的索引編碼
                sip_ngoo_im_index = ji.get_sip_ngoo_im_idx(un_index)
        
                #=========================================================
                # 將已注音之漢字加入【漢字注音表】
                #=========================================================
                
                if ji_found == 0:
                    # 處理查到的第一個漢字
                    chu_im_sheet.range('B' + str(i)).value = chu_im
                    chu_im_sheet.range('C' + str(i)).value = siann_bu
                    chu_im_sheet.range('D' + str(i)).value = un_bu
                    chu_im_sheet.range('E' + str(i)).value = tiau_ho

                    chu_im_sheet.range('F' + str(i)).value = siann_index + 1
                    chu_im_sheet.range('G' + str(i)).value = un_index + 1
                    chu_im_sheet.range('H' + str(i)).value = sip_ngoo_im_index
                else:
                    # 若查到的漢字有兩個以上
                    # ji_khoo_sheet  = wb.sheets["字庫表"]
                    idx = ji_khoo_index
                    ji_khoo_sheet.range('A' + str(idx)).value = search_han_ji
                
                    ji_khoo_sheet.range('B' + str(idx)).value = chu_im
                    ji_khoo_sheet.range('C' + str(idx)).value = siann_bu
                    ji_khoo_sheet.range('D' + str(idx)).value = un_bu
                    ji_khoo_sheet.range('E' + str(idx)).value = tiau_ho

                    ji_khoo_sheet.range('F' + str(idx)).value = siann_index + 1
                    ji_khoo_sheet.range('G' + str(idx)).value = un_index + 1
                    ji_khoo_sheet.range('H' + str(idx)).value = sip_ngoo_im_index
                    
                    ji_khoo_sheet.range('I' + str(idx)).value = i
                    ji_khoo_sheet.range('J' + str(idx)).value = han_ji_id

                    ji_khoo_index += 1

        #=========================================================
        # 計數【整段讀入，逐一處理的漢字】已處理多少個
        #=========================================================
        i += 1

    #=========================================================
    # 調整讀取來源；寫入標的各手標
    #=========================================================
    chu_im_index += len(han_ji_list)
    chu_im_sheet.range('A' + str(chu_im_index)).value = '\n'
    chu_im_index += 1
    row += 1
    
#==========================================================
# 關閉資料庫
#==========================================================
conn.close()

row = 1
row = 2
row = 3
Can not find 【眾】in Han-Ji-Khoo!!
row = 4
Can not find 【 】in Han-Ji-Khoo!!
row = 5
row = 6
row = 7
Can not find 【 】in Han-Ji-Khoo!!
row = 8
row = 9
row = 10
Can not find 【 】in Han-Ji-Khoo!!
row = 11
row = 12
row = 13
Can not find 【 】in Han-Ji-Khoo!!
row = 14
row = 15
Can not find 【﹖】in Han-Ji-Khoo!!
row = 16
Can not find 【 】in Han-Ji-Khoo!!
row = 17
row = 18
row = 19
Can not find 【 】in Han-Ji-Khoo!!
row = 20
row = 21
row = 22
Can not find 【 】in Han-Ji-Khoo!!
row = 23
row = 24
Can not find 【眾】in Han-Ji-Khoo!!
row = 25
Can not find 【 】in Han-Ji-Khoo!!
row = 26
row = 27
row = 28
Can not find 【 】in Han-Ji-Khoo!!
row = 29
row = 30
Can not find 【﹖】in Han-Ji-Khoo!!
Can not find 【﹖】in Han-Ji-Khoo!!
Can not find 【﹖】in Han-Ji-Khoo!!
Can not find 【﹖】in Han-Ji-Khoo!!
Can not find 【﹖】in Han-Ji-Khoo!!
Can not find 【﹖】in Han-Ji-Khoo!!
row = 31
Can not find 【 】in Han-Ji-Khoo!!
row = 32
Can not find 【 】in Han-Ji-Khoo!!
row = 33
row = 34
Can not find 【埏】in Han-Ji-Khoo!!
Can not fi