In [None]:
import json
import os
import csv

import numpy as np
import matplotlib.pyplot as plt
from fontTools.ttLib import TTFont
from PIL import Image, ImageDraw, ImageFont, ImageChops
from tqdm import tqdm

import logging
logging.getLogger("fontTools").setLevel(logging.ERROR) # 只是想消除 '2 extra bytes in post.stringData array'

CUR_DIR = os.getcwd()
ROOT_DIR = os.path.abspath(os.path.join(CUR_DIR, ".."))

CHAR_FREQ_FILE = os.path.join(ROOT_DIR, 'resources', 'char_freq.json')
with open(CHAR_FREQ_FILE, "r", encoding="utf-8") as f:
    CHAR_FREQ_DB = json.load(f)

ERRORS_CSV = 'vector_errors.csv'
CORRECT_CSV = 'vector_correct.csv'
OUT_FOLDER = os.path.join(ROOT_DIR, 'output', 'vector_test')
os.makedirs(OUT_FOLDER, exist_ok=True)
ERRORS_CSV = os.path.join(OUT_FOLDER, ERRORS_CSV)
CORRECT_CSV = os.path.join(OUT_FOLDER, CORRECT_CSV)


TODO: Fix error

In [None]:
def compare_char_render(char_1: str, char_2: str, font_path1: str, font_path2: str, show_img: bool = False, similarity_threshold: float = 0.975) -> bool:
    """
    Render a character using two different fonts and compare if the generated images are sufficiently similar.
    
    Parameters:
      char_1: The character to render with the first font.
      char_2: The character to render with the second font.
      font_path1: File path for the first font.
      font_path2: File path for the second font.
      show_img: If True, display the images using matplotlib.
      similarity_threshold: Similarity threshold (0 - 1); if the similarity is above this threshold, the images are considered 'the same'.
      
    Returns:
      bool: True if the two images are sufficiently different, False if they are similar (or both fonts do not support the character).
    """
    canvas_size = 64   # Canvas size
    font_size = 48     # Font size

    # Load font 1 + 2 and check if it supports the character
    try:
        ttf1 = TTFont(font_path1)
        cmap1 = ttf1.getBestCmap()
        supported1 = {chr(code) for code in cmap1.keys()}
    except Exception as e:
        print(f"Failed to load font {font_path1}: {e}")
        supported1 = set()
    font1 = ImageFont.truetype(font_path1, font_size)
    
    try:
        ttf2 = TTFont(font_path2)
        cmap2 = ttf2.getBestCmap()
        supported2 = {chr(code) for code in cmap2.keys()}
    except Exception as e:
        print(f"Failed to load font {font_path2}: {e}")
        supported2 = set()
    font2 = ImageFont.truetype(font_path2, font_size)
    
    def render_char(font, char):
        img = Image.new("L", (canvas_size, canvas_size), color=255)
        draw = ImageDraw.Draw(img)
        # Calculate the bounding box of the character to center it
        bbox = draw.textbbox((0, 0), char, font=font)
        w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
        x = (canvas_size - w) // 2 - bbox[0]
        y = (canvas_size - h) // 2 - bbox[1]
        draw.text((x, y), char, fill=0, font=font)
        return img

    # Render images for each font if the character is supported
    image1 = render_char(font1, char_1) if char_1 in supported1 else None
    image2 = render_char(font2, char_2) if char_2 in supported2 else None

    if show_img:
        fig, axs = plt.subplots(1, 2, figsize=(6, 3))
        if image1:
            axs[0].imshow(image1, cmap="gray")
            axs[0].set_title("Font1")
        else:
            axs[0].text(0.5, 0.5, "Not Supported", ha="center", va="center")
            axs[0].set_title("Font1")
        if image2:
            axs[1].imshow(image2, cmap="gray")
            axs[1].set_title("Font2")
        else:
            axs[1].text(0.5, 0.5, "Not Supported", ha="center", va="center")
            axs[1].set_title("Font2")
        for ax in axs:
            ax.axis("off")
        plt.tight_layout()
        plt.show()

    if image1 is None and image2 is None:
        return False
    if image1 is None or image2 is None:
        return True

    # Compute the difference between the two images
    diff = ImageChops.difference(image1, image2)
    # diff.getbbox() 为 None 时表示两幅图完全一致
    # are_different = diff.getbbox() is not None
    # return are_different

    # Convert the difference image to a numpy array and compute the average pixel difference (0 - 255)
    avg_diff = np.mean(np.array(diff))
    # Calculate similarity (1 indicates identical images)
    similarity = 1 - (avg_diff / 255)
    return similarity < similarity_threshold


In [None]:
if __name__ == "__main__":
    SHS_CN = 'SourceHanSansCN-Regular.woff2'
    SHS_SC = 'SourceHanSansSC-Regular.woff2'
    # web_font = 'fixed.m8p286k7.woff2'
    web_font = 'randomFont_test.ttf'
    font1_path = os.path.join(ROOT_DIR, "fonts", SHS_SC)
    font2_path = os.path.join(ROOT_DIR, "temp", web_font)
    test_char_1 = "和"
    test_char_2 = "㑋"
    result = compare_char_render(test_char_1, test_char_2, font1_path, font2_path, show_img=True, similarity_threshold=0.975)
    print(f"渲染 '{test_char_1}' 是否不一样：{result}")


In [None]:

def load_replace_map(path):
    """加载替换映射文件"""
    if os.path.exists(path):
        with open(path, "r", encoding="utf-8") as f:
            return json.load(f)
    return {}

def get_supported_chars(font_path):
    """返回指定字体文件中支持的字符集合"""
    try:
        ttf_temp = TTFont(font_path)
        cmap_temp = ttf_temp.getBestCmap()
        return {chr(code) for code in cmap_temp.keys()}
    except Exception as e:
        print(f"Failed to load font {font_path}: {e}")
        return set()

def compare_char_render_v2(char_1: str, char_2: str,
                           font1, supported_chars1: set,
                           font2, supported_chars2: set,
                           similarity_threshold: float = 0.975) -> bool:
    """
    Render a character using two different fonts and compare if the generated images are sufficiently similar.
    
    Parameters:
      char_1: The character to render with the first font.
      char_2: The character to render with the second font.
      font_path1: File path for the first font.
      font_path2: File path for the second font.
      similarity_threshold: Similarity threshold (0 - 1); if the similarity is above this threshold, the images are considered 'the same'.
      
    Returns:
      bool: True if the two images are sufficiently different, False if they are similar (or both fonts do not support the character).
    """
    canvas_size = 64   # Canvas size
    
    def render_char(font, char):
        img = Image.new("L", (canvas_size, canvas_size), color=255)
        draw = ImageDraw.Draw(img)
        # Calculate the bounding box of the character to center it
        bbox = draw.textbbox((0, 0), char, font=font)
        w, h = bbox[2] - bbox[0], bbox[3] - bbox[1]
        x = (canvas_size - w) // 2 - bbox[0]
        y = (canvas_size - h) // 2 - bbox[1]
        draw.text((x, y), char, fill=0, font=font)
        return img

    # Render images for each font if the character is supported
    image1 = render_char(font1, char_1) if char_1 in supported_chars1 else None
    image2 = render_char(font2, char_2) if char_2 in supported_chars2 else None

    if image1 is None and image2 is None:
        return False
    if image1 is None or image2 is None:
        return True

    # Compute the difference between the two images
    diff = ImageChops.difference(image1, image2)
    avg_diff = np.mean(np.array(diff))
    similarity = 1 - (avg_diff / 255)
    return similarity < similarity_threshold


def process_font(supported_chars, comp_font, description, base_font, supported_base, replace_map, similarity_threshold):
    """
    对给定字体支持的字符进行处理

    Parameters:
        supported_chars: 比较字体的字符集合
        comp_font: 用于比较的预初始化字体对象
        description: tqdm 进度条描述信息
        base_font: 作为基准的预初始化字体对象
        supported_base: 基准字体支持的字符集合
        replace_map: 替换映射字典
        similarity_threshold: 渲染相似度阈值

    Returns:
        total: 处理的字符总数
        diff: 渲染不同的字符数量
        errors: 错误列表，每项为 [correct_label, predicted_label, char_freq]
        corrects: 正确列表，每项为 [correct_label, predicted_label, char_freq]
    """
    errors = []
    corrects = []
    total = 0
    diff = 0
    for original_char in tqdm(supported_chars, desc=description):
        char_1 = replace_map.get(original_char, original_char)
        char_2 = original_char
        char_freq = CHAR_FREQ_DB.get(char_1, 5)
        result = compare_char_render_v2(
            char_1, char_2,
            base_font, supported_base,
            comp_font, supported_chars,
            similarity_threshold=similarity_threshold
        )
        total += 1
        if result:
            diff += 1
            errors.append([char_1, char_2, char_freq])
        else:
            corrects.append([char_1, char_2, char_freq])
    return total, diff, errors, corrects

if __name__ == "__main__":
    # 字体和文件路径配置
    SHS_SC = 'SourceHanSansSC-Regular.woff2'
    web_randfont = 'randomFont_test.ttf'
    web_fixfont = 'fixed.m8p286k7.woff2'
    font1_path = os.path.join(ROOT_DIR, "fonts", SHS_SC)
    web_randfont_path = os.path.join(ROOT_DIR, "temp", web_randfont)
    web_fixfont_path = os.path.join(ROOT_DIR, "temp", web_fixfont)
    replace_map_path = os.path.join(ROOT_DIR, "output", "829661174", "font_mapping.json")
    
    # 加载替换映射
    replace_map = load_replace_map(replace_map_path)
    
    # 获取各字体支持的字符集合
    supported_base = get_supported_chars(font1_path)
    supported_rand = get_supported_chars(web_randfont_path)
    supported_fix = get_supported_chars(web_fixfont_path)
    
    # 初始化字体对象
    font_size = 48
    font_base = ImageFont.truetype(font1_path, font_size)
    font_rand = ImageFont.truetype(web_randfont_path, font_size)
    font_fix = ImageFont.truetype(web_fixfont_path, font_size)
    
    similarity_threshold = 0.95
    total_count = 0
    diff_count = 0
    all_errors = []
    all_corrects = []
    
    # 定义待处理字体任务：每项包含 (字符集合, 比较字体路径, 描述信息)
    font_tasks = [
        (supported_rand, font_rand, "Processing random font (U+3400-U+4DB5)"),
        (supported_fix, font_fix, "Processing fixed font (U+4E00-U+9FA5)")
    ]
    
    # 循环处理所有字体
    for supported_chars, comp_font, desc in font_tasks:
        t, d, errors, corrects = process_font(
            supported_chars, comp_font, desc,
            font_base, supported_base,
            replace_map, similarity_threshold
        )
        total_count += t
        diff_count += d
        all_errors.extend(errors)
        all_corrects.extend(corrects)
    
    # 打印统计信息
    print(f"Total characters processed: {total_count}")
    print(f"Different renders count: {diff_count}")
    if total_count > 0:
        print(f"Difference ratio: {diff_count/total_count:.2%}")
    
    # 写入错误样本 CSV 文件
    with open(ERRORS_CSV, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['correct_label', 'predicted_label', 'char_freq'])
        writer.writerows(all_errors)
    print(f"\n错误样本已保存到: {ERRORS_CSV}")
    
    # 写入正确样本 CSV 文件
    with open(CORRECT_CSV, 'w', newline='', encoding='utf-8-sig') as f:
        writer = csv.writer(f)
        writer.writerow(['correct_label', 'predicted_label', 'char_freq'])
        writer.writerows(all_corrects)
    print(f"正确样本保存至: {CORRECT_CSV}")
