In [2]:
import fitz
import json
import base64

import cv2

In [16]:
def get_rect_box(data):
    if data['type'] == 'text':
        if 'boxWidth' in data:
            width = int(data['boxWidth'].replace('px', ''))
            height = int(data['boxHeight'].replace('px', ''))
        else:
            width = fitz.getTextlength(text=data['text'], fontsize=int(data['fontSize'][:-2])) + 2
            height = int(data['fontSize'][:-2]) + 2
    elif data['type'] == 'img':
        width = float(data['width'][:-2])
        height = float(data['height'][:-2])

    rect_x1 = float(data['left'][:-2]) + 3
    rect_y1 = float(data['top'][:-2])
    rect_x2 = rect_x1 + width
    rect_y2 = rect_y1 + height

    return rect_x1, rect_y1, rect_x2, rect_y2

In [17]:
def fill_pdf(input_data_file, original_file, filled_result_dir):
    # read input data and original pdf
    input_data = json.load(open(input_data_file, 'r'))
    if original_file.split('.')[-1].lower() == 'pdf':
        # for pdf form, directly load it
        doc = fitz.open(original_file)
    else:
        # for image form, convert it to
        img = cv2.imread(original_file)
        if img.shape[0] > 1200:
            img = resize_img_by_height(img, 900)
            cv2.imwrite(filled_result_dir + 'resize.png', img)
            original_file = filled_result_dir + 'resize.png'
        src = fitz.open('pdf', fitz.open(original_file).convertToPDF())
        doc = fitz.open()
        page = doc.newPage(width=img.shape[1], height=img.shape[0])
        page.showPDFpage(page.rect, src)

    # fill data
    for data in input_data:
        page = doc[int(data['page']) - 1]
        if data['type'] == 'text':
            align_map = {'left':0, 'center':1, 'right':2, '':0}
            page.insertTextbox(get_rect_box(data), data['text'], fontsize=int(data['fontSize'][:-2]), color=(0, 0, 0), align=align_map[data['textAlign']])
        elif data['type'] == 'img':
            sig_img = filled_result_dir + data['page'] + '-' + data['id'] + '.png'
            open(sig_img, 'wb').write(base64.b64decode(data['img'].replace('data:image/png;base64,', '')))
            page.insertImage(get_rect_box(data), sig_img)
    doc.save(filled_result_dir + 'filled.pdf')

In [18]:
input_dir = '3-pdf/'
fill_pdf(input_data_file=input_dir+'input.json', original_file=input_dir+'/1.pdf', filled_result_dir=input_dir)