In [None]:
from fontTools.ttLib import TTFont

def get_supported_chars(font_path):
    # 加载字体文件
    font = TTFont(font_path)
    
    # 获取字体文件的字符映射表
    cmap = font['cmap']
    
    # 集合来存储所有支持的字符
    supported_chars = set()
    
    # 遍历字符映射表的每个子表
    for table in cmap.tables:
        # 添加支持的字符到集合中
        supported_chars.update([chr(c) for c in table.cmap.keys()])
    
    return supported_chars

char_table =  get_supported_chars('xxxxxx/font/NotoSansHans-Medium.otf')

print(len(char_table))

In [None]:
import os
import numpy as np
import sys
import random
import urllib
from PIL import Image,ImageFont,ImageDraw
import cv2

BASE_FONT = 'xxxxxx/font/NotoSansHans-Medium.otf'


# for Pillow==10.3.0
def pil_content_render(content, font_size, font_path, font_color=(0, 0, 0)):
    # 初始化字体
    font = ImageFont.truetype(font_path, size=font_size)
    
    # 获取文本尺寸
    bbox = font.getbbox(content)
        
    # 创建适当大小的白底图像
    image_size = (bbox[2], bbox[3])

    ct_img = np.ones((image_size[1], image_size[0], 3), dtype=np.uint8) * 255
    
    # 转换为PIL图像
    ct_img = Image.fromarray(ct_img)
    draw = ImageDraw.Draw(ct_img)
    
    # 绘制文本
    draw.text((0, 0), content, font=font, fill=font_color)
    
    # 转换为OpenCV格式
    ct_img = cv2.cvtColor(np.asarray(ct_img), cv2.COLOR_RGB2BGR)
    
    return ct_img


def synthesis_content_img(content, text_size):
    st_h, st_w = text_size
    bg_img = np.ones((st_h, st_w, 3), dtype=np.uint8) * 255

    global BASE_FONT # font path
    
    # 内容文本
    ct_img = pil_content_render(content=content, 
                                font_size=st_h, 
                                font_path=BASE_FONT,
                                font_color=(0,0,0))
    
    ct_h, ct_w, _ = ct_img.shape

    # 不改变aspect ratios 缩放到 style img 能够容纳的长宽
    new_w, new_h = ct_w, ct_h

    if (ct_h>st_h) or (ct_w>st_w): 
        if (ct_h/ct_w) > (st_h/st_w):
            new_h = st_h
            new_w = int((st_h/ct_h) * ct_w) 
        else:
            new_w = st_w
            new_h = int((st_w/ct_w) * ct_h)

    resized_ct_img = cv2.resize(ct_img,
                                (new_w, new_h), 
                                interpolation=cv2.INTER_CUBIC)

    # center paste
    real_text_pos = [(st_w-new_w)//2, (st_h-new_h)//2, (st_w-new_w)//2+new_w, (st_h-new_h)//2+new_h]

    bg_img[(st_h-new_h)//2 : (st_h-new_h)//2+new_h,
            (st_w-new_w)//2 : (st_w-new_w)//2+new_w, :] = resized_ct_img

    return bg_img, real_text_pos


In [None]:
import matplotlib.pyplot as plt
H = 48
W = 48*2
content_im, _ = synthesis_content_img('三', (H, W)) # 渲染文字，返回图片，白底黑字，需要自己试下
content_im = content_im

plt.imshow(content_im)
plt.axis('off')
plt.show()
print(content_im.shape)

In [None]:
from easydict import EasyDict as edict
from model.recognizer import TextRecognizer, create_predictor

import model.recognizer
reload(model.recognizer)
from model.recognizer import TextRecognizer, create_predictor


device = torch.device('cuda')
rec_model_dir = "./ocr_weights/ppv3_rec.pth"
text_predictor = create_predictor(rec_model_dir).eval().to(device=device)
orc_args = edict()
orc_args.rec_image_shape = f"3, {H}, {W}"

orc_args.rec_batch_num = 6
orc_args.rec_char_dict_path = './ocr_recog/ppocr_keys_v1.txt'
orc_args.use_fp16 = False
cn_recognizer = TextRecognizer(orc_args, text_predictor)

In [None]:
import torch

# check image and feature shape

content_im = torch.from_numpy(content_im).to(device=device).permute(2, 0, 1) / 127.5 - 1.0 
print(content_im.shape)
preds, preds_neck, preds_bc = cn_recognizer.pred_imglist([content_im, ], show_debug=False)
print(preds_neck.shape)
print(preds.shape)
print(preds_bc.shape)

In [None]:
import torch
from tqdm import tqdm


char_table = list(char_table)
char2feat_ocr = {}
with torch.no_grad():
    for char in tqdm(char_table):
        try:
            content_im, _ = synthesis_content_img(char, (H, W))
        except Exception:
            print(f'render error: {char}')
            continue
        
        # to tensor
        content_im = torch.from_numpy(content_im).to(device=device).permute(2, 0, 1) / 127.5 - 1.0 

        # 这里需要下改recognizer.pred_imglist函数的返回值，来自anytext
        preds, preds_neck, preds_bc = cn_recognizer.pred_imglist([content_im,], show_debug=False)

        # decode
        preds_all = preds.softmax(dim=-1) 
        pred = preds_all[0]
        order, idx = cn_recognizer.decode(pred)
        text = cn_recognizer.get_text(order)

        # ocr feat
        preds_neck = preds_neck[0]
        preds = preds[0]
        preds_bc = preds_bc[0]

        # mean pooling
        feat = torch.mean(preds_neck, dim=0)

        char2feat_ocr[char] = feat.cpu().clone()


In [None]:
torch.save(char2feat_ocr, 'xxxxxx/char2feat_ocr.pth')