# Init

In [347]:
import fitz
import lorem
import json
import random
import re
import os

fitz.TOOLS.set_small_glyph_heights(True)
fitz.TOOLS.set_graphics_min_line_width(True)

UNUSUAL_CHARS = ["-","~","*"]
FONT_TYPE = fitz.Font(ordering=0)
FONT_SIZE = 12
COLOR_BLACK = (0, 0, 0)
COLOR_WHITE = (1, 1, 1)
TEST_DOCUMENT_DIR = "../test-document-v2"

# Illusivicate PDF

In [348]:
def illusificate_entity(page: fitz.Page, clip: fitz.Rect, tw: fitz.TextWriter, curr_page: int):
    # print(clip)

    blocks = page.get_text("dict", clip=clip)["blocks"]
    span = blocks[0]["lines"][0]["spans"][0]        
    font = fitz.Font(span['font'], ordering=1)
    tw.append(span["origin"], span['text'], fontsize=FONT_SIZE, font=font)
    page.add_redact_annot(clip)

    shape=page.new_shape()
    shape.draw_rect(clip)
    shape.finish(color=(1,0,1), fill=(1, 1, 0))
    shape.commit()

    return {
        # "bbox": span["bbox"],
        "origin": span["origin"],
        "text": span["text"],
        "page": curr_page,
    }

def illusificate_doc(doc: fitz.Document):
    res = []
    for curr_page, page in enumerate(doc):
        tw = fitz.TextWriter(page.rect, color=COLOR_WHITE)
        for char in UNUSUAL_CHARS:
            clips = page.search_for(char)            
            for clip in clips:
                res.append(illusificate_entity(page, clip, tw, curr_page))
            
        # print(clip)

        page.apply_redactions(images=0)
        tw.write_text(page)
        return res

In [349]:
def generate_illusive_document(filename: str, doc_text: str,):

    doc = fitz.open()
    page = doc.new_page()

    page_padding = 72
    page_rect = page.rect
    writer = fitz.TextWriter(page_rect, color=COLOR_BLACK)

    fill_rect = fitz.Rect(
        page_padding,
        page_padding,
        page_rect.width - page_padding,
        page_rect.height - page_padding,
    )

    writer.fill_textbox(
        fill_rect,
        doc_text,
        align=fitz.TEXT_ALIGN_LEFT,
        warn=True,
        fontsize=FONT_SIZE,
        font=FONT_TYPE,
    )
    writer.write_text(page)
    
    results = illusificate_doc(doc)
    
    json_object = json.dumps(results, indent=2)
    with open(f"{filename}.json", "w") as outfile:
        outfile.write(json_object)

    doc.save(f"{filename}.pdf")

# Random Text Generation

In [350]:
def generate_document_text(num_paragraph: int = 4):
    doc_text = [lorem.paragraph() for i in range(0, num_paragraph)]
    return '\n\n'.join(doc_text)

# Generate Illusive Document

In [351]:
def random_unusual_char(match):
    return random.choice(UNUSUAL_CHARS)

def generate_illusive_docs(doc_amount: int):
    assert(doc_amount >= 1)
    dir_name = f"{TEST_DOCUMENT_DIR}/spaces"
    os.makedirs(dir_name, exist_ok=True)
    for i in range(1, doc_amount + 1):
        doc_text = generate_document_text()
        doc_text = re.sub(r"[ \t]", random_unusual_char, doc_text)
        generate_illusive_document(f"{dir_name}/TEST{i}", doc_text)

# Main Process

In [352]:
generate_illusive_docs(10)