In [1]:
import os
import cv2
import matplotlib.pyplot as plt
from PIL import Image
from vietocr.tool.predictor import Predictor
from vietocr.tool.config import Cfg
from paddleocr import PaddleOCR, draw_ocr

In [2]:
FONT = "C:\\Users\\ADMIN\\Desktop\\Slide_School\\SlideKy7\\PBL6\\Preparation\\latin.ttf"

In [3]:
detector = PaddleOCR(use_angle_cls = False, lang = "vi", use_gpu = False)

[2024/10/23 19:54:36] ppocr DEBUG: Namespace(help='==SUPPRESS==', use_gpu=False, use_xpu=False, use_npu=False, use_mlu=False, ir_optim=True, use_tensorrt=False, min_subgraph_size=15, precision='fp32', gpu_mem=500, gpu_id=0, image_dir=None, page_num=0, det_algorithm='DB', det_model_dir='C:\\Users\\ADMIN/.paddleocr/whl\\det\\en\\en_PP-OCRv3_det_infer', det_limit_side_len=960, det_limit_type='max', det_box_type='quad', det_db_thresh=0.3, det_db_box_thresh=0.6, det_db_unclip_ratio=1.5, max_batch_size=10, use_dilation=False, det_db_score_mode='fast', det_east_score_thresh=0.8, det_east_cover_thresh=0.1, det_east_nms_thresh=0.2, det_sast_score_thresh=0.5, det_sast_nms_thresh=0.2, det_pse_thresh=0, det_pse_box_thresh=0.85, det_pse_min_area=16, det_pse_scale=1, scales=[8, 16, 32], alpha=1.0, beta=1.0, fourier_degree=5, rec_algorithm='SVTR_LCNet', rec_model_dir='C:\\Users\\ADMIN/.paddleocr/whl\\rec\\latin\\latin_PP-OCRv3_rec_infer', rec_image_inverse=True, rec_image_shape='3, 48, 320', rec_batc

In [4]:
config = Cfg.load_config_from_name('vgg_transformer')
config['cnn']['pretrained'] = True
config['predictor']['beamsearch'] = True
config['device'] = 'cpu' # mps

recognitor = Predictor(config)

Model weight C:\Users\ADMIN\AppData\Local\Temp\vgg_transformer.pth exsits. Ignore download!


## `Extract Bounding Boxes`

In [5]:
def extract_text_boxes(detector, img_path, save_path, padding=4):
    img = cv2.imread(img_path)

    result = detector.ocr(img_path, cls=False, det=True, rec=False)
    result = result[:][:][0]

    boxes = []
    for line in result:
        boxes.append([[int(line[0][0]), int(line[0][1])], [int(line[2][0]), int(line[2][1])]])
    boxes = boxes[::-1]

    for box in boxes:
        box[0][0] = box[0][0] - padding
        box[0][1] = box[0][1] - padding
        box[1][0] = box[1][0] + padding
        box[1][1] = box[1][1] + padding

    # Extract and save each cropped image
    for i, box in enumerate(boxes):
        cropped_img = img[box[0][1]:box[1][1], box[0][0]:box[1][0]]
        cropped_img_path = os.path.join(save_path, f"{os.path.basename(img_path).split('.')[0]}_box_{i}.png")
        
        # Try to save the cropped image, skip if there's an error
        try:
            if cropped_img.size != 0:  # Ensure the cropped image is not empty
                cv2.imwrite(cropped_img_path, cropped_img)
                print(f"Saved: {cropped_img_path}")
            else:
                print(f"Skipped empty image for box {i}")
        except Exception as e:
            print(f"Error saving box {i}: {e}")

    return boxes

def process_image_folder(detector, input_folder, output_folder, padding=4):
    # Ensure the output folder exists
    os.makedirs(output_folder, exist_ok=True)

    # Loop through all files in the input folder
    for filename in os.listdir(input_folder):
        if filename.lower().endswith(('.png', '.jpg', '.jpeg')):  # Check for image files
            img_path = os.path.join(input_folder, filename)
            print(f"Processing: {filename}")
            extract_text_boxes(detector, img_path, output_folder, padding)

In [6]:
input_folder = "C:\\Users\\ADMIN\\Desktop\\Slide_School\\SlideKy7\\PBL6\\image_21_10"
output_folder = "output_boxes_23.10"
if not os.path.exists(output_folder):
    os.makedirs(output_folder)
    
process_image_folder(detector, input_folder, output_folder, padding = 2)

Processing: cccd_640.jpg
Saved: output_boxes_23.10\cccd_640_box_0.png
Saved: output_boxes_23.10\cccd_640_box_1.png
Saved: output_boxes_23.10\cccd_640_box_2.png
Saved: output_boxes_23.10\cccd_640_box_3.png
Saved: output_boxes_23.10\cccd_640_box_4.png
Saved: output_boxes_23.10\cccd_640_box_5.png
Saved: output_boxes_23.10\cccd_640_box_6.png
Saved: output_boxes_23.10\cccd_640_box_7.png
Saved: output_boxes_23.10\cccd_640_box_8.png
Saved: output_boxes_23.10\cccd_640_box_9.png
Saved: output_boxes_23.10\cccd_640_box_10.png
Saved: output_boxes_23.10\cccd_640_box_11.png
Saved: output_boxes_23.10\cccd_640_box_12.png
Saved: output_boxes_23.10\cccd_640_box_13.png
Saved: output_boxes_23.10\cccd_640_box_14.png
Saved: output_boxes_23.10\cccd_640_box_15.png
Saved: output_boxes_23.10\cccd_640_box_16.png
Processing: image109.jpg
Saved: output_boxes_23.10\image109_box_0.png
Saved: output_boxes_23.10\image109_box_1.png
Saved: output_boxes_23.10\image109_box_2.png
Saved: output_boxes_23.10\image109_box_3.pn