In [1]:
# %%
import io
import os
import sys
import types

from IPython import get_ipython
from nbformat import read
from IPython.core.interactiveshell import InteractiveShell

# %%
def find_notebook(fullname, path=None):
    """find a notebook, given its fully qualified name and an optional path

    This turns "foo.bar" into "foo/bar.ipynb"
    and tries turning "Foo_Bar" into "Foo Bar" if Foo_Bar
    does not exist.
    """
    name = fullname.rsplit('.', 1)[-1]
    if not path:
        path = ['']
    for d in path:
        nb_path = os.path.join(d, name + ".ipynb")
        if os.path.isfile(nb_path):
            return nb_path
        # let import Notebook_Name find "Notebook Name.ipynb"
        nb_path = nb_path.replace("_", " ")
        if os.path.isfile(nb_path):
            return nb_path

# %%
class NotebookLoader(object):
    """Module Loader for Jupyter Notebooks"""

    def __init__(self, path=None):
        self.shell = InteractiveShell.instance()
        self.path = path

    def load_module(self, fullname):
        """import a notebook as a module"""
        path = find_notebook(fullname, self.path)

        print("importing Jupyter notebook from %s" % path)

        # load the notebook object
        with io.open(path, 'r', encoding='utf-8') as f:
            nb = read(f, 4)

        # create the module and add it to sys.modules
        # if name in sys.modules:
        #    return sys.modules[name]
        mod = types.ModuleType(fullname)
        mod.__file__ = path
        mod.__loader__ = self
        mod.__dict__['get_ipython'] = get_ipython
        sys.modules[fullname] = mod

        # extra work to ensure that magics that would affect the user_ns
        # actually affect the notebook module's ns
        save_user_ns = self.shell.user_ns
        self.shell.user_ns = mod.__dict__

        try:
            for cell in nb.cells:
                if cell.cell_type == 'code':
                    # transform the input to executable Python
                    code = self.shell.input_transformer_manager.transform_cell(cell.source)
                    # run the code in themodule
                    exec(code, mod.__dict__)
        finally:
            self.shell.user_ns = save_user_ns
        return mod

# %%
class NotebookFinder(object):
    """Module finder that locates Jupyter Notebooks"""

    def __init__(self):
        self.loaders = {}

    def find_module(self, fullname, path=None):
        nb_path = find_notebook(fullname, path)
        if not nb_path:
            return

        key = path
        if path:
            # lists aren't hashable
            key = os.path.sep.join(path)

        if key not in self.loaders:
            self.loaders[key] = NotebookLoader(path)
        return self.loaders[key]


# %%
sys.meta_path.append(NotebookFinder())

# %%
# ls nbpackage

# %%
# from nbpackage import han_ji_chu_im as ji


In [2]:
#coding=utf-8
import xlwings as xw
import re
import pandas as pd
import psycopg2
# from modules.my_libs import han_ji_chu_im_python as ji
# from modules.my_libs import import_ipynb
from nbpackage import han_ji_chu_im as ji

#==========================================================
# Excel 檔案
#==========================================================

# 指定提供來源的【檔案】
file_path = 'hoo-goa-chu-im.xlsx'
wb = xw.Book(file_path)

# 指定提供來源的【工作表】；及【總列數】
source_sheet = wb.sheets['工作表1']
end_row = source_sheet.range('A' + str(source_sheet.cells.last_cell.row)).end('up').row
print(f'end_row = {end_row}')

#==========================================================
# 備妥程式需使用之工作表
#==========================================================
sheet_name_list = ["字庫表", "缺字表", "漢字注音表"]
#-----------------------------------------------------
# 檢查工作表是否已存在
for sheet_name in sheet_name_list:
    sheet = wb.sheets[sheet_name]
    try:
        sheet.select()
        sheet.clear()
        continue
    except: 
        # CommandError 的 Exception 發生日，表工作表不存在
        # 新增程式需使用之工作表
        wb.sheets.add(name=sheet_name)

khiam_ji_sheet = wb.sheets["缺字表"]
ji_khoo_sheet  = wb.sheets["字庫表"]
chu_im_sheet   = wb.sheets["漢字注音表"]


importing Jupyter notebook from /Users/alanjui/workspace/rime/ho-lok-oe-chu-im/nbpackage/han_ji_chu_im.ipynb
end_row = 11


In [3]:
#==========================================================
# 資料庫
#==========================================================
conn = psycopg2.connect(database="alanjui", user="alanjui", host="127.0.0.1", port="5432")
cur = conn.cursor()

row = 1     # index for source sheet
chu_im_index = 1
ji_khoo_index = 1
khiam_ji_index = 1
end_counter = end_row + 1

while row < end_counter:
    print(f'row = {row}')
    # 自 source_sheet 取待注音漢字
    han_ji = str(source_sheet.range('A' + str(row)).value)
    han_ji.strip()
    
    #=========================================================
    # 如是空白或換行，處理換行
    #=========================================================
    if han_ji == '\n' or han_ji == 'None':
        chu_im_sheet.range('A' + str(chu_im_index)).value = '\n'
        chu_im_index += 1
        row += 1
        continue
   
    #=========================================================
    # 自【來源工作表】，讀入【整段】的漢字，轉存到目的工作表：【漢字注音表】
    # 在【漢字注音表】的每個儲存格，只存放一個漢字
    #=========================================================
    han_ji_list = ji.convert_string_to_list(han_ji)
    
    chu_im_sheet.range('A' + str(chu_im_index)) \
                .options(transpose=True).value = han_ji_list

    #=========================================================
    # 將整段讀入的漢字，逐一加注音
    #=========================================================
    i = chu_im_index
    # 取用每一個漢字，自【字庫】資料庫查找其【注音碼】
    for search_han_ji in han_ji_list:
        # 若取出之字為標點符號，則跳過，並繼續取下一個漢字。
        # piau_tiam = r"[，、：；。？！（）「」【】《》“]"
        piau_tiam = r"[﹐，；：？！\uFF0C\uFF08-\uFF09\u2013-\u2014\u2026\\u2018-\u201D\u3000\u3001-\u303F]"
        searchObj = re.search(piau_tiam, search_han_ji, re.M|re.I)
        if searchObj:
            i += 1
            continue
        
        # SQL 查詢指令：自字庫查找某漢字之注音碼
        #sql = f"select id, han_ji, chu_im, freq, siann, un, tiau from han_ji where han_ji='{search_han_ji}'"
        sql =   "SELECT id, han_ji, chu_im, freq, siann, un, tiau "\
                "FROM han_ji "\
               f"WHERE han_ji='{search_han_ji}' "\
                "ORDER BY freq DESC;"
        cur.execute(sql)
        query_rows = cur.fetchall()
        
        # 漢字能否查到注音碼，將有不同的處理作業
        if not query_rows:
            # 問題發生：找不到漢字的注音碼
            print(f"Can not find 【{search_han_ji}】in Han-Ji-Khoo!!")
            khiam_ji_sheet.range('A' + str(khiam_ji_index)).value = search_han_ji
            khiam_ji_index += 1
            i += 1
            continue
        else:
            # 漢字查到注音；遇漢字有多種讀音，需於【字庫表】留紀錄
            ji_soo = len(query_rows)
            for ji_found in range(ji_soo):
                # 若查到注音的漢字，有兩個以上；則需記錄漢字的 ID 編碼
                han_ji_id = query_rows[ji_found][0]
                # 將已找到注音碼的漢字，逐一分解出：聲母、韻母、調號
                chu_im = query_rows[ji_found][2]
                chu_im_code = ji.split_chu_im(chu_im)
                siann_bu = chu_im_code[0].strip()
                un_bu = chu_im_code[1]
                tiau_ho = chu_im_code[2]

                # 取得聲母之聲母碼
                if siann_bu != "":
                    siann_index = ji.get_siann_idx(siann_bu)
                
                # 取得韻母之韻母碼
                un_index = ji.get_un_idx(un_bu)
                if un_index == -1:
                    # 記錄沒找到之韻母
                    print(f"Can not find 【{search_han_ji}】in Han-Ji-Khoo!!")
                    khiam_ji_sheet.range('A' + str(khiam_ji_index)).value = f"{search_han_ji}: (Un-Bu: {un_bu})"
                    khiam_ji_index += 1
        
                # 取得韻母在十五音的索引編碼
                sip_ngoo_im_index = ji.get_sip_ngoo_im_idx(un_index)
        
                #=========================================================
                # 將已注音之漢字加入【漢字注音表】
                #=========================================================
                
                if ji_found == 0:
                    # 處理查到的第一個漢字
                    chu_im_sheet.range('B' + str(i)).value = chu_im
                    chu_im_sheet.range('C' + str(i)).value = siann_bu
                    chu_im_sheet.range('D' + str(i)).value = un_bu
                    chu_im_sheet.range('E' + str(i)).value = tiau_ho

                    chu_im_sheet.range('F' + str(i)).value = siann_index + 1
                    chu_im_sheet.range('G' + str(i)).value = un_index + 1
                    chu_im_sheet.range('H' + str(i)).value = sip_ngoo_im_index
                else:
                    # 若查到的漢字有兩個以上
                    # ji_khoo_sheet  = wb.sheets["字庫表"]
                    idx = ji_khoo_index
                    ji_khoo_sheet.range('A' + str(idx)).value = search_han_ji
                
                    ji_khoo_sheet.range('B' + str(idx)).value = chu_im
                    ji_khoo_sheet.range('C' + str(idx)).value = siann_bu
                    ji_khoo_sheet.range('D' + str(idx)).value = un_bu
                    ji_khoo_sheet.range('E' + str(idx)).value = tiau_ho

                    ji_khoo_sheet.range('F' + str(idx)).value = siann_index + 1
                    ji_khoo_sheet.range('G' + str(idx)).value = un_index + 1
                    ji_khoo_sheet.range('H' + str(idx)).value = sip_ngoo_im_index
                    
                    ji_khoo_sheet.range('I' + str(idx)).value = i
                    ji_khoo_sheet.range('J' + str(idx)).value = han_ji_id

                    ji_khoo_index += 1

        #=========================================================
        # 計數【整段讀入，逐一處理的漢字】已處理多少個
        #=========================================================
        i += 1

    #=========================================================
    # 調整讀取來源；寫入標的各手標
    #=========================================================
    chu_im_index += len(han_ji_list)
    chu_im_sheet.range('A' + str(chu_im_index)).value = '\n'
    chu_im_index += 1
    row += 1
    
#==========================================================
# 關閉資料庫
#==========================================================
conn.close()

row = 1
Un-bu: ik does not exist
Can not find 【昔】in Han-Ji-Khoo!!
Un-bu: siat does not exist
Can not find 【已】in Han-Ji-Khoo!!
Un-bu: ok does not exist
Can not find 【鶴】in Han-Ji-Khoo!!
row = 2
Un-bu: shu does not exist
Can not find 【此】in Han-Ji-Khoo!!
Un-bu: ok does not exist
Can not find 【鶴】in Han-Ji-Khoo!!
row = 3
row = 4
Un-bu: ok does not exist
Can not find 【鶴】in Han-Ji-Khoo!!
Un-bu: it does not exist
Can not find 【一】in Han-Ji-Khoo!!
Un-bu: ut does not exist
Can not find 【不】in Han-Ji-Khoo!!
Un-bu: ok does not exist
Can not find 【復】in Han-Ji-Khoo!!
Un-bu: ok does not exist
Can not find 【復】in Han-Ji-Khoo!!
row = 5
Un-bu: ik does not exist
Can not find 【白】in Han-Ji-Khoo!!
Un-bu: shian does not exist
Can not find 【千】in Han-Ji-Khoo!!
Un-bu: sai does not exist
Can not find 【載】in Han-Ji-Khoo!!
Un-bu: sai does not exist
Can not find 【載】in Han-Ji-Khoo!!
row = 6
row = 7
Un-bu: sing does not exist
Can not find 【晴】in Han-Ji-Khoo!!
Un-bu: shuan does not exist
Can not find 【川】in Han-Ji-Khoo!!
Un-