In [1]:
import onnxruntime as ort

en_sess = ort.InferenceSession("./onnx/ocr_encoder.onnx")
de_sess = ort.InferenceSession("./onnx/ocr_decoder.onnx")

In [2]:
from vietocr.tool.config import Cfg
from vietocr.tool.translate import process_image

config = Cfg.load_config_from_name("vgg_transformer")
config['device'] = 'cpu'
config

  from .autonotebook import tqdm as notebook_tqdm


{'vocab': 'aAàÀảẢãÃáÁạẠăĂằẰẳẲẵẴắẮặẶâÂầẦẩẨẫẪấẤậẬbBcCdDđĐeEèÈẻẺẽẼéÉẹẸêÊềỀểỂễỄếẾệỆfFgGhHiIìÌỉỈĩĨíÍịỊjJkKlLmMnNoOòÒỏỎõÕóÓọỌôÔồỒổỔỗỖốỐộỘơƠờỜởỞỡỠớỚợỢpPqQrRsStTuUùÙủỦũŨúÚụỤưƯừỪửỬữỮứỨựỰvVwWxXyYỳỲỷỶỹỸýÝỵỴzZ0123456789!"#$%&\'()*+,-./:;<=>?@[\\]^_`{|}~ ',
 'device': 'cpu',
 'seq_modeling': 'transformer',
 'transformer': {'d_model': 256,
  'nhead': 8,
  'num_encoder_layers': 6,
  'num_decoder_layers': 6,
  'dim_feedforward': 2048,
  'max_seq_length': 1024,
  'pos_dropout': 0.1,
  'trans_dropout': 0.1},
 'optimizer': {'max_lr': 0.0003, 'pct_start': 0.1},
 'trainer': {'batch_size': 32,
  'print_every': 200,
  'valid_every': 4000,
  'iters': 100000,
  'export': './weights/transformerocr.pth',
  'checkpoint': './checkpoint/transformerocr_checkpoint.pth',
  'log': './train.log',
  'metrics': None},
 'dataset': {'name': 'data',
  'data_root': './img/',
  'train_annotation': 'annotation_train.txt',
  'valid_annotation': 'annotation_val_small.txt',
  'image_height': 32,
  'image_min_width': 32,
  'image_m

In [3]:
import numpy as np

def preprocess_batch(list_img):
    total_img = len(list_img)
    # Get max shape
    batch_width = 0
    batch_list = []
    for idx, img in enumerate(list_img):
        img = process_image(img, config['dataset']['image_height'], 
                config['dataset']['image_min_width'], config['dataset']['image_max_width'])
        im_width = img.shape[2]
        if im_width > batch_width:
            batch_width = im_width
        batch_list.append(img) 
    # Create batch
    batch = np.ones((total_img, 3, config['dataset']['image_height'], batch_width))
    for idx, single in enumerate(batch_list):
        _, height, width = single.shape
        batch[idx, :, :, :width] = single
    return batch

In [4]:
import utility
import os

img_dir = "./imgs"
fnames = os.listdir(img_dir)
fpaths = [os.path.join(img_dir, fname) for fname in fnames if fname.endswith('png')]
fpaths

['./imgs/test.png', './imgs/test_2.png', './imgs/test_3.png']

In [5]:
from PIL import Image

list_img = [Image.open(fpath) for fpath in fpaths]
batch = preprocess_batch(list_img)
batch

array([[[[0.99215686, 0.98431373, 0.97647059, ..., 1.        ,
          1.        , 1.        ],
         [0.99215686, 0.98431373, 0.97647059, ..., 1.        ,
          1.        , 1.        ],
         [0.99215686, 0.98823529, 0.98431373, ..., 1.        ,
          1.        , 1.        ],
         ...,
         [0.96470588, 0.98039216, 0.96078431, ..., 1.        ,
          1.        , 1.        ],
         [0.96862745, 0.96862745, 0.97254902, ..., 1.        ,
          1.        , 1.        ],
         [0.97254902, 0.95294118, 0.96078431, ..., 1.        ,
          1.        , 1.        ]],

        [[0.99215686, 0.98431373, 0.97647059, ..., 1.        ,
          1.        , 1.        ],
         [0.99215686, 0.98431373, 0.97647059, ..., 1.        ,
          1.        , 1.        ],
         [0.99215686, 0.98823529, 0.98431373, ..., 1.        ,
          1.        , 1.        ],
         ...,
         [0.96470588, 0.97647059, 0.95686275, ..., 1.        ,
          1.        , 1. 

In [6]:
batch = batch.copy().astype('float32')

In [7]:
inp = {en_sess.get_inputs()[0].name: batch}
res = en_sess.run(None, inp)
print(res[0].shape)

(170, 3, 256)


In [8]:
from vietocr.tool.translate import translate_onnx
sents, probs= translate_onnx(batch, encoder_sess=en_sess, decoder_sess=de_sess)
for sen, prob in zip(sents, probs):
    print(sen, prob)

[0.9317984122496384, 0.9224570412789622, 0.9078623235225678]
[  1 171  78  60  96 232  44   4 232  92  76  12  42   2 232 144  76  12
  42   2 232 144  76  12 100   2  98  76   2   2   2  96] 0.9317984122496384
[  1 145  76   4  98  76 232 175 146  28  98 232 212 232  77   6 232  99
 122  78 232 212 232 171  78  70 144 232  99   4  96   2] 0.9224570412789622
[  1 191 198 194 194 193 191 195 191 195   2 195   2   2   2   2   2   2
   2   2   2   2   2   2   2   2   2 190 190 190 190 190] 0.9078623235225678


In [9]:
from vietocr.tool.translate import build_model

model, vocab = build_model(config)

In [11]:
for i, s in enumerate(sents):
    s = sents[i].tolist()
    s = vocab.decode(s)
    print(s)

Viêm da khác
Thanh Xuân - Hà Nội - Việt Nam
184431515


In [5]:
import argparse

def str2bool(v):
    return v.lower() in ("true", "t", "1")


parser = argparse.ArgumentParser()
parser.add_argument("--use_gpu", type=str2bool, default=False)
parser.add_argument("--use_onnx", type=str2bool, default=False)

# params for text detector
parser.add_argument("--det_algorithm", type=str, default='DB')
parser.add_argument("--det_model_dir", type=str)
parser.add_argument("--det_limit_side_len", type=float, default=960)
parser.add_argument("--det_limit_type", type=str, default='max')
parser.add_argument("--det_box_type", type=str, default='quad')

# DB parmas
parser.add_argument("--det_db_thresh", type=float, default=0.3)
parser.add_argument("--det_db_box_thresh", type=float, default=0.6)
parser.add_argument("--det_db_unclip_ratio", type=float, default=1.5)
parser.add_argument("--max_batch_size", type=int, default=10)
parser.add_argument("--use_dilation", type=str2bool, default=False)
parser.add_argument("--det_db_score_mode", type=str, default="fast")

args = parser.parse_args()
args.use_gpu

usage: ipykernel_launcher.py [-h] [--use_gpu USE_GPU] [--use_onnx USE_ONNX]
                             [--det_algorithm DET_ALGORITHM]
                             [--det_model_dir DET_MODEL_DIR]
                             [--det_limit_side_len DET_LIMIT_SIDE_LEN]
                             [--det_limit_type DET_LIMIT_TYPE]
                             [--det_box_type DET_BOX_TYPE]
                             [--det_db_thresh DET_DB_THRESH]
                             [--det_db_box_thresh DET_DB_BOX_THRESH]
                             [--det_db_unclip_ratio DET_DB_UNCLIP_RATIO]
                             [--max_batch_size MAX_BATCH_SIZE]
                             [--use_dilation USE_DILATION]
                             [--det_db_score_mode DET_DB_SCORE_MODE]
ipykernel_launcher.py: error: unrecognized arguments: --ip=127.0.0.1 --stdin=9016 --control=9014 --hb=9013 --Session.signature_scheme="hmac-sha256" --Session.key=b"37f2dcfb-70f5-419f-9813-e199d3436d88" --shell=9015 

SystemExit: 2

In [3]:
import paddleocr.tools.infer.utility as utility
from paddleocr.tools.infer.predict_det import TextDetector

args = utility.parse_args()
text_det = TextDetector(args)

usage: ipykernel_launcher.py [-h] [--use_gpu USE_GPU] [--use_xpu USE_XPU]
                             [--use_npu USE_NPU] [--ir_optim IR_OPTIM]
                             [--use_tensorrt USE_TENSORRT]
                             [--min_subgraph_size MIN_SUBGRAPH_SIZE]
                             [--precision PRECISION] [--gpu_mem GPU_MEM]
                             [--image_dir IMAGE_DIR] [--page_num PAGE_NUM]
                             [--det_algorithm DET_ALGORITHM]
                             [--det_model_dir DET_MODEL_DIR]
                             [--det_limit_side_len DET_LIMIT_SIDE_LEN]
                             [--det_limit_type DET_LIMIT_TYPE]
                             [--det_box_type DET_BOX_TYPE]
                             [--det_db_thresh DET_DB_THRESH]
                             [--det_db_box_thresh DET_DB_BOX_THRESH]
                             [--det_db_unclip_ratio DET_DB_UNCLIP_RATIO]
                             [--max_batch_size MAX_BATCH_SIZE

AttributeError: 'tuple' object has no attribute 'tb_frame'