In [None]:
import os
from pathlib import Path
from typing import List
from PIL import Image, ImageDraw, ImageFont
import freetype

class FontGlyphExtractor:
    def __init__(self, font_path: str, output_dir: str):
        self.font_path = font_path
        self.output_dir = output_dir
        self.font_name = Path(font_path).stem
        self.face = freetype.Face(font_path)
        self.glyphs = self._get_glyphs()

    def _get_glyphs(self) -> List[int]:
        """获取字体文件中所有已制作字形的字符编码。"""
        char_codes = []
        char_code, glyph_index = self.face.get_first_char()
        while glyph_index != 0:
            if glyph_index != 0:
                char_codes.append(char_code)
            char_code, glyph_index = self.face.get_next_char(char_code, glyph_index)
        return char_codes

    def _render_glyph(self, char_code: int) -> Image.Image:
        """将单个字形渲染为图片。"""
        char = chr(char_code)
        font = ImageFont.truetype(self.font_path, size=64) # 修改字体大小
        image = Image.new('L', (80, 80), color=255) # 修改图片大小
        draw = ImageDraw.Draw(image)
        w, h = draw.textsize(char, font=font)
        draw.text(((80 - w) / 2, (80 - h) / 2), char, font=font, fill=0)
        return image

    def extract_glyphs(self):
        """提取所有字形并保存为图片。"""
        output_path = Path(self.output_dir) / self.font_name
        output_path.mkdir(parents=True, exist_ok=True)
        for char_code in self.glyphs:
            glyph_name = f"U+{char_code:04X}"
            glyph_dir = output_path / glyph_name
            glyph_dir.mkdir(exist_ok=True)
            image = self._render_glyph(char_code)
            image_path = glyph_dir / f"{glyph_name}.png"
            image.save(image_path)
            print(f"Saved glyph {glyph_name} to {image_path}")

def main():
    font_files = [
        '中华书局宋体字库/FZSONG_ZhongHuaSongPlane00_2021051420210514150927.TTF',
        '中华书局宋体字库/FZSONG_ZhongHuaSongPlane02_2021012120210122112919.TTF',
        '中华书局宋体字库/FZSONG_ZhongHuaSongPlane15_2021051420210514150947.TTF'
    ]
    output_dir = 'output_glyphs'
    for font_file in font_files:
        extractor = FontGlyphExtractor(font_file, output_dir)
        extractor.extract_glyphs()

if __name__ == "__main__":
    main()


In [None]:
import os
from pathlib import Path
from PIL import Image, ImageDraw, ImageFont
import freetype

class FontProcessor:
    def __init__(self, font_path: str, output_dir: str):
        self.font_path = font_path
        self.output_dir = output_dir
        self.font_name = Path(font_path).stem
        self.face = freetype.Face(font_path)

    def get_glyphs(self):
        """获取字体文件中所有已制作的字形的字符编码。"""
        char_codes = []
        char_code, glyph_index = self.face.get_first_char()
        while glyph_index != 0:
            char_codes.append(char_code)
            char_code, glyph_index = self.face.get_next_char(char_code, glyph_index)
        return char_codes

    def render_glyph(self, char_code: int) -> Image:
        """将单个字形渲染为图片。"""
        char = chr(char_code)
        font = ImageFont.truetype(self.font_path, size=64)
        image = Image.new('L', (80, 80), color=255)  # 创建白色背景的图片
        draw = ImageDraw.Draw(image)
        w, h = draw.textsize(char, font=font)
        draw.text(((80 - w) / 2, (80 - h) / 2), char, font=font, fill=0)  # 黑色文字
        return image

    def save_glyph_images(self):
        """保存字体文件中每个字形为图片，并按字形编码进行组织。"""
        output_path = Path(self.output_dir) / self.font_name
        output_path.mkdir(parents=True, exist_ok=True)

        for char_code in self.get_glyphs():
            glyph_name = f"U+{char_code:04X}"
            glyph_dir = output_path / glyph_name
            glyph_dir.mkdir(parents=True, exist_ok=True)

            image = self.render_glyph(char_code)
            image_path = glyph_dir / f"{glyph_name}.png"
            image.save(image_path)
            print(f"Saved glyph {glyph_name} at {image_path}")

class FontDatasetBuilder:
    def __init__(self, font_files: list, output_dir: str):
        self.font_files = font_files
        self.output_dir = output_dir

    def process_fonts(self):
        """处理所有字体文件，提取字形并保存为图片。"""
        for font_file in self.font_files:
            processor = FontProcessor(font_file, self.output_dir)
            processor.save_glyph_images()

def main():
    font_files = [
        '中华书局宋体字库 2024/中华书局宋体02平面_20231010.TTF',
        '中华书局宋体字库 2024/中华书局宋体15平面_20240514.TTF',
        '中华书局宋体字库 2024/中华书局宋体16平面_20240906.TTF'
    ]
    output_dir = 'glyph_output'

    dataset_builder = FontDatasetBuilder(font_files, output_dir)
    dataset_builder.process_fonts()

if __name__ == "__main__":
    main()

In [None]:
import os
from pathlib import Path
from typing import List
from PIL import Image, ImageDraw, ImageFont
import freetype

class FontGlyphExtractor:
    def __init__(self, font_path: str, output_dir: str):
        self.font_path = font_path
        self.output_dir = output_dir
        self.font_name = Path(font_path).stem
        self.face = freetype.Face(font_path)
        self.glyphs = self._get_glyphs()

    def _get_glyphs(self) -> List[int]:
        """获取字体文件中所有已制作字形的字符编码。"""
        char_codes = []
        char_code, glyph_index = self.face.get_first_char()
        while glyph_index != 0:
            if glyph_index != 0:
                char_codes.append(char_code)
            char_code, glyph_index = self.face.get_next_char(char_code, glyph_index)
        return char_codes

    def _render_glyph(self, char_code: int) -> Image.Image:
        """将单个字形渲染为图片。"""
        char = chr(char_code)
        font = ImageFont.truetype(self.font_path, size=62)  # 修改字体大小
        image = Image.new('L', (64, 64), color=255)  # 修改图片大小
        draw = ImageDraw.Draw(image)
        w, h = draw.textsize(char, font=font)
        draw.text(((64 - w) / 2, (64 - h) / 2), char, font=font, fill=0)
        return image

    def extract_glyphs(self):
        """提取所有字形并保存为图片。"""
        output_path = Path(self.output_dir) / self.font_name
        output_path.mkdir(parents=True, exist_ok=True)
        for char_code in self.glyphs:
            glyph_name = f"U+{char_code:04X}"
            image = self._render_glyph(char_code)
            image_path = output_path / f"{glyph_name}.png"
            image.save(image_path)
            print(f"Saved glyph {glyph_name} to {image_path}")

def main():
    font_files = [
        '中华书局宋体字库 2024/中华书局宋体02平面_20231010.TTF',
        '中华书局宋体字库 2024/中华书局宋体15平面_20240514.TTF',
        '中华书局宋体字库 2024/中华书局宋体16平面_20240906.TTF'
    ]
    output_dir = 'output_glyphs'
    for font_file in font_files:
        extractor = FontGlyphExtractor(font_file, output_dir)
        extractor.extract_glyphs()

if __name__ == "__main__":
    main()


In [None]:
from fontTools.ttLib import TTFont
from tkinter import Tk, Label, Entry, Button

def char_code_to_text(char_code, font_path):
    """
    将字符编码转换为字符文本。
    :param char_code: 字符编码（整数）
    :param font_path: 字体文件路径（字符串）
    :return: 对应的字符文本（字符串），如果找不到则返回空字符串
    """
    try:
        # 打开字体文件
        font = TTFont(font_path)
        # 获取 cmap 表（字符编码到字形ID的映射）
        cmap = font['cmap']
        for table in cmap.tables:
            if table.isUnicode():
                if char_code in table.cmap:
                    char = chr(char_code)
                    return char
        # 如果字符编码不在 Unicode 范围内，尝试从 glyf 表获取字形名称
        glyf = font['glyf']
        glyph_set = font.getGlyphSet()
        for glyph_name in glyf.glyphs:
            glyph_id = font.getGlyphID(glyph_name)
            if glyph_id == char_code:
                # 返回字形名称作为字符文本
                return glyph_name
        return ''
    except Exception as e:
        print(f"Error: {e}")
        return ''

def display_and_edit_result(char_code, font_path):
    """
    显示和编辑识别结果的 GUI 界面。
    :param char_code: 字符编码（整数）
    :param font_path: 字体文件路径（字符串）
    """
    char_text = char_code_to_text(char_code, font_path)

    def save_edit():
        new_text = entry.get()
        print(f"Edited Text: {new_text}")
        root.destroy()

    root = Tk()
    root.title("识别结果编辑")
    Label(root, text="识别结果：").grid(row=0, column=0)
    entry = Entry(root)
    entry.grid(row=0, column=1)
    entry.insert(0, char_text)
    Button(root, text="保存", command=save_edit).grid(row=1, column=0, columnspan=2)
    root.mainloop()

# 示例用法
if __name__ == "__main__":
    # 假设识别出的字符编码为 0x4E00（'一' 字）
    char_code = 0x2308C
    font_path = '中华书局宋体字库 2024/中华书局宋体02平面_20231010.TTF'

    # 获取字符文本
    char_text = char_code_to_text(char_code, font_path)
    print(f"识别出的字符文本：{char_text}")

    # 显示和编辑识别结果
    display_and_edit_result(char_code, font_path)


识别出的字符文本：𣂌
