In [137]:
import base64
import json
import re
from hashlib import sha256
from pathlib import Path
from typing import Any
from urllib.parse import unquote

import lark_oapi as lark
import requests
from bs4 import BeautifulSoup, Tag
from bs4.element import Comment, NavigableString, PageElement
from lark_oapi.api.docx.v1 import (Block, Equation, GetDocumentRequest, GetDocumentResponse, ListDocumentBlockRequest,
                                   ListDocumentBlockResponse, Text, TextElement, TextElementStyle, TextRun)
from lark_oapi.api.drive.v1 import DownloadMediaRequest, DownloadMediaResponse
from lark_oapi.api.sheets.v3 import (GetSpreadsheetSheetRequest, GetSpreadsheetSheetResponse,
                                     GetSpreadsheetSheetResponseBody)

from secret import APP_ID, APP_SECRET

In [138]:
class MyLark:
    def __init__(self, app_id: str, app_secret: str, log_level: lark.LogLevel):
        self.app_id: str = app_id
        self.app_secret: str = app_secret
        self.log_level: lark.LogLevel = log_level

        self.client: lark.Client = (lark.Client.builder()
                                    .app_id(app_id)
                                    .app_secret(app_secret)
                                    .log_level(log_level)
                                    .build())
        self.tenant_access_token: str = self.get_tenant_access_token()

    def get_tenant_access_token(self) -> str:
        """获取令牌"""
        with requests.post("https://open.feishu.cn/open-apis/auth/v3/tenant_access_token/internal", json={
            "app_id": APP_ID,
            "app_secret": APP_SECRET
        }) as response:
            self.tenant_access_token: str = response.json()["tenant_access_token"]
        return self.tenant_access_token

    def get_docx_blocks(self, document_id: str, document_revision_id: int = -1) -> list[Block]:
        """获取文档所有块"""
        items: list[Block] = []
        page_token: str = ""
        while True:
            request: ListDocumentBlockRequest = (ListDocumentBlockRequest.builder()
                                                 .document_id(document_id)
                                                 .page_size(500)
                                                 .page_token(page_token)
                                                 .document_revision_id(document_revision_id)
                                                 .build())
            response: ListDocumentBlockResponse = self.client.docx.v1.document_block.list(request)
            assert response.success()
            items.extend(response.data.items)
            if not response.data.has_more:
                break
            else:
                page_token = response.data.page_token
        return items

    def get_spreadsheet_sheet_info(self, spreadsheet_token: str, sheet_id: str) -> GetSpreadsheetSheetResponse:
        """获取工作表大小和单元格合并信息"""
        request: GetSpreadsheetSheetRequest = GetSpreadsheetSheetRequest.builder().spreadsheet_token(spreadsheet_token).sheet_id(sheet_id).build()
        response: GetSpreadsheetSheetResponse = self.client.sheets.v3.spreadsheet_sheet.get(request)
        return response

    def get_spreadsheet_sheet_value(self, spreadsheet_token: str, sheet_id: str, *, valueRenderOption: str | None = None) -> dict[str, Any]:
        """获取工作表的内容"""
        url = f"https://open.feishu.cn/open-apis/sheets/v2/spreadsheets/{spreadsheet_token}/values/{sheet_id}"
        with requests.get(url=url, params={"valueRenderOption": valueRenderOption}, headers={"Authorization": f"Bearer {self.tenant_access_token}"}) as response:
            return response.json()

    # def get_all_spreadsheet_sheets(self, SpreadsheetToken_SheetID_list: list[str], *, save: bool) -> tuple[dict[str, GetSpreadsheetSheetResponseBody], dict[str, dict[str, Any]]]:
    #     """获取一列工作表的内容"""
    #     path = Path("sheets")
    #     if save:
    #         path.mkdir(parents=True, exist_ok=True)

    #     info_dict: dict[str, GetSpreadsheetSheetResponseBody] = {}
    #     value_dict: dict[str, dict[str, Any]] = {}
    #     for SpreadsheetToken_SheetID in SpreadsheetToken_SheetID_list:
    #         spreadsheet_token, sheet_id = SpreadsheetToken_SheetID.split("_")
    #         response: GetSpreadsheetSheetResponse = self.get_spreadsheet_sheet_info(spreadsheet_token, sheet_id)
    #         info = response.data
    #         info_dict[SpreadsheetToken_SheetID] = info

    #         obj = self.get_spreadsheet_sheet_value(spreadsheet_token, sheet_id, valueRenderOption=None)
    #         value = obj["data"]
    #         value_dict[SpreadsheetToken_SheetID] = value

    #         if save:
    #             with open(path / f"{SpreadsheetToken_SheetID}_info.json", "w", encoding="utf-8") as fp:
    #                 fp.write(lark.JSON.marshal(response.data, 4))
    #             with open(path / f"{SpreadsheetToken_SheetID}_value.json", "w", encoding="utf-8") as fp:
    #                 json.dump(value, fp, ensure_ascii=False, indent=4)
    #     return info_dict, value_dict

    def get_all_spreadsheet_sheets_with_cache(self, SpreadsheetToken_SheetID_list: list[str], *, use_cache: bool) -> tuple[dict[str, dict[str, Any]], dict[str, dict[str, Any]]]:
        """获取一列工作表的内容，如果本地存在则读取，否则下载并保存到本地"""
        path = Path("sheets")
        path.mkdir(parents=True, exist_ok=True)

        sheet_info_dict: dict[str, Any] = {}
        sheet_value_dict: dict[str, dict[str, Any]] = {}
        for SpreadsheetToken_SheetID in SpreadsheetToken_SheetID_list:
            spreadsheet_token, sheet_id = SpreadsheetToken_SheetID.split("_")
            info_path = path / f"{SpreadsheetToken_SheetID}_info.json"
            value_path = path / f"{SpreadsheetToken_SheetID}_value.json"

            if info_path.is_file() and use_cache:  # 如果本地存在且使用缓存，则读取
                with open(info_path, "r", encoding="utf-8") as fp:
                    sheet_info_dict[SpreadsheetToken_SheetID] = json.load(fp)
            else:  # 否则下载并保存到本地
                response: GetSpreadsheetSheetResponse = self.get_spreadsheet_sheet_info(spreadsheet_token, sheet_id)
                sheet_info_str: str = lark.JSON.marshal(response.data, 4)
                sheet_info: dict[str, Any] = json.loads(sheet_info_str)
                sheet_info_dict[SpreadsheetToken_SheetID] = sheet_info
                with open(info_path, "w", encoding="utf-8") as fp:
                    fp.write(sheet_info_str)

            if value_path.is_file() and use_cache:  # 如果本地存在且使用缓存，则读取
                with open(value_path, "r", encoding="utf-8") as fp:
                    sheet_value_dict[SpreadsheetToken_SheetID] = json.load(fp)
            else:  # 否则下载并保存到本地
                obj = self.get_spreadsheet_sheet_value(spreadsheet_token, sheet_id, valueRenderOption="FormattedValue")
                sheet_value_dict[SpreadsheetToken_SheetID] = obj["data"]
                with open(value_path, "w", encoding="utf-8") as fp:
                    json.dump(obj["data"], fp, ensure_ascii=False, indent=4)

        return sheet_info_dict, sheet_value_dict

    def download_image(self, file_token: str) -> bytes:
        """下载图片"""
        request: DownloadMediaRequest = (DownloadMediaRequest.builder()
                                         .file_token(file_token)
                                         .build())
        response: DownloadMediaResponse = self.client.drive.v1.media.download(request)
        return response.file.read()

    def get_image_with_cache(self, file_token: str) -> bytes:
        """获取图片，如果本地存在则读取，否则下载并保存到本地"""
        path = Path("images") / f"{file_token}.png"
        if path.is_file():
            return path.read_bytes()
        else:
            path.parent.mkdir(parents=True, exist_ok=True)
            image_data = self.download_image(file_token)
            path.write_bytes(image_data)
            return image_data


def image_to_base64(image: bytes) -> str:
    return base64.b64encode(image).decode("utf-8")

In [139]:
my_lark = MyLark(APP_ID, APP_SECRET, lark.LogLevel.INFO)
blocks: list[Block] = my_lark.get_docx_blocks("JYQswCHz7ilaTykFqvOcWFc1nrg")
SpreadsheetToken_SheetID_list: list[str] = [block.sheet.token for block in blocks if block.block_type == 30]
sheet_info_dict, sheet_value_dict = my_lark.get_all_spreadsheet_sheets_with_cache(SpreadsheetToken_SheetID_list, use_cache=False)

In [141]:
def new_tag(*, name: str, attrs=None, contents: list | None = None) -> Tag:
    tag = Tag(name=name, attrs=attrs)
    if contents is not None:
        tag.extend(contents)
    return tag


def get_lang(char: str) -> str:
    """判断字符的语言"""
    if re.match(r"[0-9a-zA-Z,.:?%]", char):
        return 'en'
    else:
        return 'zh'


def get_lang_content(string: str) -> list[PageElement]:
    spans = []
    current_lang = None
    current_chars = []

    # 处理剩余字符
    for char in string:
        # 判断当前字符的语言
        lang = get_lang(char)

        if lang == current_lang:
            # 如果语言相同，则添加到当前字符列表
            current_chars.append(char)
        else:
            # 如果语言不同，则创建新的 span 标签并添加到列表
            if current_chars:
                spans.append(new_tag(name="span", attrs={"lang": current_lang}, contents=[NavigableString(''.join(current_chars))]))
            # 更新当前语言和字符列表
            current_lang = lang
            current_chars = [char]
    # 添加最后一个字符列表
    if current_chars:
        spans.append(new_tag(name="span", attrs={"lang": current_lang}, contents=[NavigableString(''.join(current_chars))]))

    return spans


def get_string_content(string: str) -> list[PageElement]:
    """解析字符串中的换行符"""
    parts = string.split("\n")
    contents: list[PageElement] = []
    for i, part in enumerate(parts):
        if part:
            # contents.append(NavigableString(part))
            contents.extend(get_lang_content(part))
        if i < len(parts) - 1:
            contents.append(Tag(name="br", can_be_empty_element=True))
    return contents


def get_sheet_html(sheet_info: dict[str, Any], sheet_value: dict[str, Any]) -> Tag:
    # value["values"] 是二维数组
    values = sheet_value["valueRange"]["values"]
    merges = sheet_info["sheet"].get("merges", [])
    row_count = sheet_info["sheet"]["grid_properties"]["row_count"]
    col_count = sheet_info["sheet"]["grid_properties"]["column_count"]

    # 标记每个单元格是否被合并覆盖
    skip = [[False] * col_count for _ in range(row_count)]
    merge_map = {}

    # 处理合并信息
    for merge in merges:
        sr, er = merge["start_row_index"], merge["end_row_index"]
        sc, ec = merge["start_column_index"], merge["end_column_index"]
        rowspan = er - sr + 1
        colspan = ec - sc + 1
        merge_map[(sr, sc)] = (rowspan, colspan)
        for r in range(sr, er + 1):
            for c in range(sc, ec + 1):
                if (r, c) != (sr, sc):
                    skip[r][c] = True

    soup = BeautifulSoup("", "html.parser")
    table = soup.new_tag("table")
    for r in range(row_count):
        tr = soup.new_tag("tr", attrs={"class": [f"tr-row-{r+1}"]})
        for c in range(col_count):
            if skip[r][c]:
                continue
            td = soup.new_tag("td", attrs={"class": [f"td-row-{r+1}", f"td-col-{c+1}", f"td-row-{r+1}-col-{c+1}"]})
            # 设置合并属性
            if (r, c) in merge_map:
                rowspan, colspan = merge_map[(r, c)]
                if rowspan > 1:
                    td["rowspan"] = rowspan
                if colspan > 1:
                    td["colspan"] = colspan
            # 设置内容
            try:
                cell_value = values[r][c]
                if cell_value is None or isinstance(cell_value, list | dict):
                    cell_value = ""
                content = get_string_content(str(cell_value))
                td.extend(content)
            except IndexError:
                print(f"Cell ({r}, {c}) is out of bounds for the provided values.")
                td.string = ""
            tr.append(td)
        table.append(tr)
    return table


class Blocks:
    def __init__(self, blocks: list[Block]):
        self.blocks = blocks
        self.block_dict = {block.block_id: block for block in blocks}
        self.global_image_counter = 0
        self.global_spreadsheet_counter = 0

    def construct_text_elements(self, text: Text) -> list:
        contents: list[PageElement] = []
        text_color: int | None = None  # 记录当前文本颜色，使公式的颜色与之前的文本颜色一致

        for element in text.elements:
            # 文本
            if element.text_run is not None:
                text_element_style: TextElementStyle = element.text_run.text_element_style
                element_contents: list[PageElement] = get_string_content(element.text_run.content)
                if text_element_style.text_color is not None:
                    text_color = text_element_style.text_color  # 更新当前文本颜色
                    element_contents = [new_tag(
                        name="span",
                        attrs={"class": [f"text-color-{text_element_style.text_color}"]},
                        contents=element_contents,
                    )]
                else:
                    text_color = None  # 重置为 None
                if text_element_style.bold:
                    element_contents = [new_tag(name="strong", contents=element_contents)]
                if text_element_style.italic:
                    element_contents = [new_tag(name="em", contents=element_contents)]
                if text_element_style.underline:
                    element_contents = [new_tag(name="u", contents=element_contents)]
                if text_element_style.strikethrough:
                    element_contents = [new_tag(name="s", contents=element_contents)]
                if text_element_style.inline_code:
                    element_contents = [new_tag(name="code", contents=element_contents)]
                    element_contents = [new_tag(name="pre", contents=element_contents)]
                if (text_element_style.link is not None
                        or element.text_run.content.startswith(("http://", "https://"))):
                    attrs = {"class": ["url"]} if element.text_run.content.startswith(("http://", "https://")) else {}
                    element_contents = [new_tag(
                        name="a",
                        attrs=attrs | {"href": unquote(text_element_style.link.url) if text_element_style.link is not None else element.text_run.content,
                                       "target": "_blank"},
                        contents=element_contents,
                    )]
                    # for span in element_contents[0].find_all("span"):
                    #     span.attrs = {}
                if text_element_style.background_color is not None:
                    element_contents = [new_tag(
                        name="span",
                        attrs={"class": [f"bg-color-{text_element_style.background_color}"]},
                        contents=element_contents,
                    )]
                contents.extend(element_contents)

            # 公式
            if element.equation is not None:
                wrapped_string = rf"\({element.equation.content.strip()}\)"
                equation_tag = new_tag(
                    name="span",
                    attrs={"class": ["equation"] if text_color is None else ["equation", f"text-color-{text_color}"]},
                    contents=[NavigableString(wrapped_string)]
                )
                contents.append(equation_tag)

            # 引用文档
            if element.mention_doc is not None:
                svg = new_tag(name="span", attrs={"class": "doc-logo"}, contents=[BeautifulSoup(doc_svg_content, "html.parser").svg])
                tag = new_tag(
                    name="a",
                    attrs={"href": element.mention_doc.url, "target": "_blank"},
                    contents=[svg, *get_string_content(element.mention_doc.title)]
                )
                contents.append(tag)

        return contents

    def construct_tag_from_block(self, block_id: str) -> Tag:
        block: Block = self.block_dict[block_id]
        inner: Tag
        outer: Tag

        match block.block_type:
            case 1:  # page
                inner = outer = new_tag(name="main", attrs={"class": ["page"]})
            case 2:  # text
                inner = outer = new_tag(name="p", attrs={"class": ["text"]}, contents=self.construct_text_elements(block.text))
            case 3:  # heading1
                inner = outer = new_tag(name="h1", attrs={"class": ["h1", "auto-numbering"]}, contents=self.construct_text_elements(block.heading1))
            case 4:  # heading2
                inner = outer = new_tag(name="h2", attrs={"class": ["h2", "auto-numbering"]}, contents=self.construct_text_elements(block.heading2))
            case 5:  # heading3
                inner = outer = new_tag(name="h3", attrs={"class": ["h3", "auto-numbering"]}, contents=self.construct_text_elements(block.heading3))
            case 6:  # heading4
                inner = outer = new_tag(name="h4", attrs={"class": ["h4", "auto-numbering"]}, contents=self.construct_text_elements(block.heading4))
            case 7:  # heading5
                inner = outer = new_tag(name="h5", attrs={"class": ["h5", "auto-numbering"]}, contents=self.construct_text_elements(block.heading5))
            case 8:  # heading6
                inner = outer = new_tag(name="h6", attrs={"class": ["h6", "auto-numbering"]}, contents=self.construct_text_elements(block.heading6))
            case 12:  # bullet
                inner = outer = new_tag(name="li", attrs={"class": ["bullet"]}, contents=self.construct_text_elements(block.bullet))
            case 13:  # ordered
                inner = outer = new_tag(name="li", attrs={"class": ["ordered"]}, contents=self.construct_text_elements(block.ordered))
            case 14:  # code
                inner = outer = new_tag(name="pre",
                                        attrs={"class": ["code"]},
                                        contents=[new_tag(name="code", attrs={"class": ["code"]}, contents=self.construct_text_elements(block.code))])
            case 15:  # quote
                inner = outer = new_tag(name="blockquote", attrs={"class": ["quote"]})
            case 19:  # callout
                info_tag = new_tag(name="div", attrs={"class": ["callout-icon"]}, contents=[BeautifulSoup(info_svg_content, "html.parser").svg])
                inner = new_tag(name="div", attrs={"class": ["callout-inner"]})
                outer = new_tag(name="div", attrs={"class": ["callout-outer"]}, contents=[info_tag, inner])
            case 21:  # diagram
                inner = outer = new_tag(name="div", attrs={"class": ["diagram"]})
            case 22:  # divider
                inner = outer = new_tag(name="hr")
            case 24:  # grid
                inner = outer = new_tag(name="div", attrs={"class": ["grid-container"]})
            case 25:  # grid_column
                inner = outer = new_tag(name="div", attrs={"class": ["grid-column"], "style": f"flex: {block.grid_column.width_ratio}%;"})
            case 27:  # image
                image_token: str = block.image.token
                image_data: bytes = my_lark.get_image_with_cache(image_token)
                self.global_image_counter += 1
                image_caption_string = block.image.caption.content if block.image.caption is not None else ""
                image_caption_prefix = get_string_content(f"图 {self.global_image_counter}.　")
                img_tag = new_tag(
                    name="img",
                    attrs={
                        "class": ["image"],
                        "image-token": image_token,
                        "src": f"images/{image_token}.png",
                        "alt": image_caption_string,
                        # "style": f"max-width: min(100%, {block.image.width}px);",
                    },
                )
                if image_caption_string:
                    contents = [img_tag, new_tag(name="figcaption", contents=[*image_caption_prefix, NavigableString(image_caption_string)])]
                else:
                    contents = [img_tag]
                inner = outer = new_tag(
                    name="figure",
                    attrs={"class": ["figure-image"], "image-token": image_token},
                    contents=contents,
                )
            case 30:  # sheet
                SpreadsheetToken_SheetID: str = block.sheet.token
                if False and (table := template_soup.find("table", attrs={"spreadsheet-token_sheet-id": SpreadsheetToken_SheetID})) is not None:
                    # 如果模板中已经有了这个工作表的表格，则直接使用
                    inner = outer = table
                else:
                    sheet_image_path = Path("sheets") / f"{SpreadsheetToken_SheetID}.png"
                    if sheet_image_path.is_file():
                        # 如果本地有这个工作表的图片，则使用图片
                        self.global_spreadsheet_counter += 1
                        image_caption_string = f"表 {self.global_spreadsheet_counter + 1}.　{SpreadsheetToken_SheetID}"
                        img_tag = new_tag(
                            name="img",
                            attrs={
                                "src": sheet_image_path.as_posix(),
                                "alt": image_caption_string,
                                # "style": f"max-width: min(100%, {block.image.width}px);",
                            },
                        )
                        contents = [img_tag, new_tag(name="figcaption", contents=get_string_content(image_caption_string))]
                        inner = outer = new_tag(
                            name="figure",
                            attrs={"class": "sheet"},
                            contents=contents,
                        )
                    else:
                        # 如果本地没有这个工作表的图片，则获取工作表信息和内容
                        sheet_info: dict[str, Any] = sheet_info_dict[SpreadsheetToken_SheetID]
                        sheet_value: dict[str, Any] = sheet_value_dict[SpreadsheetToken_SheetID]
                        inner = get_sheet_html(sheet_info, sheet_value)
                        inner["class"] = ["sheet"]
                        inner["spreadsheet-token_sheet-id"] = SpreadsheetToken_SheetID
                        outer = new_tag(
                            name="figure",
                            attrs={"class": "figure-table", "spreadsheet-token_sheet-id": SpreadsheetToken_SheetID},
                            contents=[new_tag(name="div", attrs={"class": ["table-container"]}, contents=[inner]), new_tag(name="figcaption")],
                        )
            case 31:  # table
                row_size: int = block.table.property.row_size
                column_size: int = block.table.property.column_size
                inner = new_tag(name="table", contents=[
                    new_tag(name="tr", attrs={"class": [f"tr-row-{row+1}"]}) for row in range(row_size)
                ])
                if block.children is not None:
                    for i, child_block_id in enumerate(block.children):
                        child_tag: Tag = self.construct_tag_from_block(child_block_id)
                        row = i // column_size
                        inner.find_all("tr")[row].append(child_tag)
                outer = new_tag(
                    name="figure",
                    attrs={"class": "figure-table"},
                    contents=[new_tag(name="div", attrs={"id": block.block_id, "class": ["table-container"]}, contents=[inner]), new_tag(name="figcaption")],
                )
                return outer
            case 32:  # table_cell
                inner = outer = new_tag(name="td")
            case 34:  # quote_container
                inner = outer = new_tag(name="blockquote")
            case 43:  # board
                inner = new_tag(name="img", attrs={"class": "board", "src": f"boards/{block.block_id}.png", "alt": f"Board {block.block_id}"})
                outer = new_tag(name="figure", attrs={"class": "figure-board"}, contents=[inner])
            case _:
                inner = outer = new_tag(name="div")

        outer["id"] = block.block_id

        if block.children is not None:
            ul_tag: Tag | None = None
            ol_tag: Tag | None = None

            for child_block_id in block.children:
                child_block: Block = self.block_dict[child_block_id]

                if child_block.block_type == 12:  # bullet
                    if ul_tag is None:
                        ul_tag = new_tag(name="ul")
                        inner.append(ul_tag)
                else:
                    ul_tag = None

                if child_block.block_type == 13:  # ordered
                    if ol_tag is None:
                        ol_tag = new_tag(name="ol")
                        inner.append(ol_tag)
                else:
                    ol_tag = None

                child_tag: Tag = self.construct_tag_from_block(child_block_id)

                if ul_tag is not None:
                    ul_tag.append(child_tag)
                elif ol_tag is not None:
                    ol_tag.append(child_tag)
                else:
                    inner.append(child_tag)

        return outer


def construct_html_from_blocks(blocks: list[Block]) -> BeautifulSoup:
    soup = template_soup.__copy__()
    title = blocks[0].page.elements[0].text_run.content
    article = Blocks(blocks).construct_tag_from_block(blocks[0].block_id)
    soup.html.head.title.string = title
    soup.html.body.append(article)
    return soup


def post_processing(soup: BeautifulSoup) -> BeautifulSoup:
    soup = BeautifulSoup(str(soup), "html.parser")
    # 针对表格元素 #UoNVdN7D6oDOupxmsghcW4sbnEb 的每个单元格，按条件添加 SVG 图标
    table = soup.select_one('#UoNVdN7D6oDOupxmsghcW4sbnEb')
    if table:
        for td in table.find_all('td'):
            try:
                value = float(td.get_text(strip=True))
                if value >= 0.0441:
                    icon_svg = new_tag(name="div", attrs={"class": "tick"}, contents=[BeautifulSoup(tick_svg_content, "html.parser").svg])
                elif value < 0.0321:
                    icon_svg = new_tag(name="div", attrs={"class": "cross"}, contents=[BeautifulSoup(cross_svg_content, "html.parser").svg])
                else:
                    icon_svg = new_tag(name="div", attrs={"class": "warning"}, contents=[BeautifulSoup(warning_svg_content, "html.parser").svg])
                td.insert(0, icon_svg)
            except ValueError:
                continue

    # 为表格添加说明
    caption_text_list = [
        get_string_content("一个信用交易所，部分商品已购买"),
        get_string_content("每日可获得的信用（「相见欢」版本以前）"),
        get_string_content("信用交易所商品信息"),
        get_string_content("商品的高低阶关系"),
        get_string_content("2025−07−07 明日方舟一图流 − 物品价值表和信用交易所性价比"),
        get_string_content("一个信用交易所"),
        get_string_content("一个信用交易所，部分商品已购买"),
        [*get_string_content("平均每日能够购买的物品数量 "), NavigableString(r"\((C = 813)\)")],
        [*get_string_content("平均每日能够购买的物品数量 "), NavigableString(r"\((C = 942)\)")],
        get_string_content("假设信用交易所每次刷新只有这一种可能"),
        get_string_content("不同折扣下商品的性价比"),
    ]
    for i, figure_table in enumerate(soup.find_all(attrs={"class": ["figure-table"]})):
        caption_text_content = [*get_string_content(f"表 {i+1}.　"), *caption_text_list[i]]
        figure_table.find("figcaption").extend(caption_text_content)

    # 替换代码块
    pre = soup.select_one("#MwESdByLuouUXCxx4Q9c0ZQ5nHb")
    assert isinstance(pre, Tag)
    with open("code.html", "r", encoding="utf-8") as fp:
        code_html = fp.read()
    code_html = (code_html
                    .replace('style="background-color: rgb(255, 255, 255);"', 'class="container"')
                    .replace('style="line-height: 22px; font-family: &quot;JetBrains Mono&quot;, 思源黑体, Consolas, Consolas, &quot;Courier New&quot;, monospace; font-size: 16px;"', 'class="line"')
                    .replace('style="line-height: 22px; font-family: &quot;JetBrains Mono&quot;, 思源黑体, Consolas, Consolas, &quot;Courier New&quot;, monospace; font-size: 16px; color: rgb(59, 59, 59);"', 'class="text-color-gray line"')
                    .replace('style="line-height: 22px; font-family: &quot;JetBrains Mono&quot;, 思源黑体, Consolas, Consolas, &quot;Courier New&quot;, monospace; font-size: 16px; color: rgb(163, 21, 21);"', 'class="text-color-red line"')
                    .replace('style="color: rgb(175, 0, 219);"', 'class="text-color-magenta"')
                    .replace('style="color: rgb(59, 59, 59);"', 'class="text-color-gray"')
                    .replace('style="color: rgb(38, 127, 153);"', 'class="text-color-lightblue"')
                    .replace('style="color: rgb(0, 0, 255);"', 'class="text-color-blue"')
                    .replace('style="color: rgb(121, 94, 38);"', 'class="text-color-brown"')
                    .replace('style="color: rgb(0, 16, 128);"', 'class="text-color-darkblue"')
                    .replace('style="color: rgb(163, 21, 21);"', 'class="text-color-red"')
                    .replace('style="color: rgb(0, 128, 0);"', 'class="text-color-green"')
                    .replace('style="color: rgb(9, 134, 88);"', 'class="text-color-lightgreen"')
                    )
    code_soup = BeautifulSoup(code_html, "html.parser")
    div = soup.new_tag("div", attrs={"class": ["code-container"], "id": "MwESdByLuouUXCxx4Q9c0ZQ5nHb"})
    div.extend(code_soup)
    pre.replace_with(div)
    return soup


with open("template.html", "r", encoding="utf-8") as fp:
    template_soup = BeautifulSoup(fp, "html.parser")
with open("svgs/doc.svg", "r", encoding="utf-8") as fp:
    doc_svg_content = fp.read()
with open("svgs/info.svg", "r", encoding="utf-8") as fp:
    info_svg_content = fp.read()
with open("svgs/tick.svg", "r", encoding="utf-8") as fp:
    tick_svg_content = fp.read()
with open("svgs/warning.svg", "r", encoding="utf-8") as fp:
    warning_svg_content = fp.read()
with open("svgs/cross.svg", "r", encoding="utf-8") as fp:
    cross_svg_content = fp.read()


soup = construct_html_from_blocks(blocks)
soup = post_processing(soup)
with open("saved_soup.html", "w", encoding="utf-8") as fp:
    fp.write(str(soup))