In [1]:
from transformers import LayoutLMv3Config, LayoutLMv3Model
import json

# Initializing a LayoutLMv3 microsoft/layoutlmv3-base style configuration
with open("/home/luckagianvechio/Documents/Material Estudo TCC/code/layoutlmv3/config.json", "r") as jeiso:
    configuration = LayoutLMv3Config(**json.load(jeiso))

# Initializing a model (with random weights) from the microsoft/layoutlmv3-base style configuration
model = LayoutLMv3Model(configuration)

# Accessing the model configuration
configuration = model.config

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from transformers import LayoutLMv3Processor, LayoutLMv3ImageProcessor
from transformers import BertTokenizer


tokenizer = BertTokenizer.from_pretrained('neuralmind/bert-base-portuguese-cased', do_lower_case=False)
image_processor = LayoutLMv3ImageProcessor.from_pretrained("microsoft/layoutlmv3-base")

In [3]:
from transformers import LayoutLMv3Processor

lmv3_processor = LayoutLMv3Processor.from_pretrained("microsoft/layoutlmv3-base")
lmv3_processor.feature_extractor.apply_ocr = False



In [4]:
from pathlib import Path
from os import listdir

main_path = Path("/home/luckagianvechio/Documents/Material Estudo TCC/IIT CDIP/images.a.a/imagesa/a/a")
a_path = Path("a/")
a_img_folder_path = [main_path / a_path / Path(pt) for pt in listdir(main_path / a_path)]
a_img_path = []
for pt in a_img_folder_path:
    files = listdir(pt)
    for file in files:
        if not file.split(".")[1] == "xml":
            a_img_path.append(pt / file)

In [5]:
from ocr_tools import get_ocr_word_box_list, preprocess_image, resize_image, read_image, normalize_bbox

image = read_image(a_img_path[0])
text_boxes, shape = get_ocr_word_box_list(a_img_path[0])
words = [k["text"] for k in text_boxes]
boxes = [normalize_bbox(k["bbox"], shape[0], shape[1]) for k in text_boxes]

In [6]:
processed = lmv3_processor(
    image,
    words,
    boxes=boxes,
    max_length=512,
    padding="max_length",
    truncation=True,
    return_tensors="pt"
)

In [7]:
model(**processed)

LayoutLMv3TextEmbeddings(
  (word_embeddings): Embedding(50265, 768, padding_idx=1)
  (token_type_embeddings): Embedding(1, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (position_embeddings): Embedding(514, 768, padding_idx=1)
  (x_position_embeddings): Embedding(1024, 128)
  (y_position_embeddings): Embedding(1024, 128)
  (h_position_embeddings): Embedding(1024, 128)
  (w_position_embeddings): Embedding(1024, 128)
) tensor([[    0,  7330, 15318,   109,   242,  2841,  1046,    17,    27,   262,
         24303, 37317,     4,  4979, 13895,   565, 14718, 10566,    35,   719,
           753,     6,  7528, 33355, 10566,    35,  5457,   759,   733,     6,
          7528, 40950,  1499,  4177, 10486,    35, 30627,     6,   886, 40950,
          1499, 11185,   337,  1872,    44,   711,   713, 11020, 20026, 13133,
             9,   143,  1203,     6, 11566,     6, 15155,  1120,    50, 11267,
          2332,    14, 26238,  



BaseModelOutput(last_hidden_state=tensor([[[ 0.2020, -1.0938, -0.1600,  ...,  1.3920,  0.7154,  0.8923],
         [ 0.9497,  0.1321, -1.1830,  ..., -0.3233, -0.1729,  1.4400],
         [ 1.1455,  1.7027, -1.3415,  ...,  0.9101,  0.1392, -0.0035],
         ...,
         [ 0.9331, -0.2009, -0.5256,  ...,  0.4081,  0.5432,  0.5877],
         [ 0.5243, -0.5275, -0.1765,  ..., -0.9974,  0.4886,  0.8465],
         [ 0.4995, -0.8935,  0.4980,  ..., -0.0478,  0.8550,  0.7672]]],
       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)

In [8]:
from transformers import LayoutLMv3Config, LayoutLMv3Model
import json

# Initializing a LayoutLMv3 microsoft/layoutlmv3-base style configuration
with open("/home/luckagianvechio/Documents/Material Estudo TCC/code/layoutlmv3/config_alterations.json", "r") as jeiso:
    configuration = LayoutLMv3Config(**json.load(jeiso))

# Initializing a model (with random weights) from the microsoft/layoutlmv3-base style configuration
model = LayoutLMv3Model(configuration)

# Accessing the model configuration
configuration = model.config

In [9]:
from transformers import BertModel  # or BertModel, for BERT without pretraining heads

bertimbau_model = BertModel.from_pretrained('/home/luckagianvechio/Documents/Material Estudo TCC/code/bertimbau')

model.embeddings.word_embeddings = bertimbau_model.embeddings.word_embeddings

In [10]:
from new_processor import tokenize_with_bbox, pad_tokenized

new_processed = tokenize_with_bbox(
    words=words,
    bboxs=boxes,
    tokenizer=tokenizer,
    max_length=512
)
padded_new_processed = pad_tokenized(new_processed, max_length=512)

In [11]:
from torch import Tensor

processed_new_tokenizer = processed.copy()
processed_new_tokenizer["input_ids"][0] = Tensor(padded_new_processed["input_ids"])
processed_new_tokenizer["bbox"][0] = Tensor(padded_new_processed["bbox"])

In [12]:
model(**processed_new_tokenizer)

LayoutLMv3TextEmbeddings(
  (word_embeddings): Embedding(29794, 768, padding_idx=0)
  (token_type_embeddings): Embedding(2, 768)
  (LayerNorm): LayerNorm((768,), eps=1e-05, elementwise_affine=True)
  (dropout): Dropout(p=0.1, inplace=False)
  (position_embeddings): Embedding(512, 768, padding_idx=0)
  (x_position_embeddings): Embedding(1024, 128)
  (y_position_embeddings): Embedding(1024, 128)
  (h_position_embeddings): Embedding(1024, 128)
  (w_position_embeddings): Embedding(1024, 128)
) tensor([[  476,  2097,  5131,   171, 22279,   173,  9174, 22361,   977,   197,
         19647,   119,   257, 22309,   192, 20257,  1043, 22282, 22286,  1292,
           185,   131,  4534,   210,   604,   275,   117,  6827,   192, 14782,
          5965,  1086,  1292,   185,   131,   134,  4424,   210,   604,  2250,
           117,  6827,  4151,  9581,  1131,  1992,  4009,  9558,   131,  2174,
           897,   117, 19921,  4151,  9581,  1131,   540,   477,  1355,  8586,
         16989,  4189,   387,  

BaseModelOutput(last_hidden_state=tensor([[[ 1.6519,  1.2543,  0.0391,  ...,  0.8546,  0.1847,  0.5731],
         [ 1.0682, -0.2072,  0.6367,  ...,  0.0131,  0.8519,  0.4590],
         [ 0.7802,  1.2017, -0.1197,  ...,  0.1721, -0.2817,  2.4566],
         ...,
         [ 0.0327, -0.4726, -0.4509,  ...,  0.3562,  0.6582,  0.8056],
         [ 1.3668,  0.3628, -0.2070,  ...,  0.2546, -0.2959,  1.2254],
         [ 0.9313, -0.3510,  0.0607,  ...,  0.5033,  0.2006,  0.9044]]],
       grad_fn=<NativeLayerNormBackward0>), hidden_states=None, attentions=None)