In [1]:
from detectron2.modeling import build_model

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
from dataclasses import dataclass
from detectron2.config import get_cfg
from config import get_settings
from ai.ditod import add_vit_config
@dataclass
class Arguments:
    dataset = "doclaynet"
    config_file_path = "ai/Configs/doclaynet_VGT_cascade_PTM.yaml"
    model_weights_path = "./models/model_final.pth"
    embedding_weights = "/home/saeed/Walnut/Quantization/models/layoutlm/"


args = Arguments()
cfg = get_cfg()
add_vit_config(cfg)
cfg.merge_from_file(args.config_file_path)


In [3]:
model = build_model(cfg)

NotImplementedError: wrap must be called at the top level of a module

In [None]:
import numpy as np
import torch
from torch import nn
import numpy as np
from ai.ditod.tokenization_bros import BrosTokenizer


def _init_weights(m):
    if isinstance(m, nn.Linear):
        # we use xavier_uniform following official JAX ViT:
        torch.nn.init.xavier_uniform_(m.weight)
        if isinstance(m, nn.Linear) and m.bias is not None:
            nn.init.constant_(m.bias, 0)
    elif isinstance(m, nn.LayerNorm):
        nn.init.constant_(m.bias, 0)
        nn.init.constant_(m.weight, 1.0)


class WordnnEmbedding(nn.Module):
    """Generate chargrid embedding feature map."""

    def __init__(
        self,
        vocab_size=30552,
        hidden_size=768,
        embedding_dim=64,
        bros_embedding_path="/bros-base-uncased/",
        use_pretrain_weight=True,
        use_UNK_text=False,
    ):
        """
        Args：
            vocab_size (int): size of vocabulary.
            embedding_dim (int): dim of input features
        """
        super().__init__()

        self.embedding = nn.Embedding(vocab_size, hidden_size)
        self.use_pretrain_weight = use_pretrain_weight
        self.use_UNK_text = use_UNK_text

        self.init_weights(bros_embedding_path)
        self.apply(_init_weights)

    def init_weights(self, bros_embedding_path):
        if self.use_pretrain_weight:
            state_dict = torch.load(
                bros_embedding_path + "pytorch_model.bin", map_location="cpu"
            )
            if "bert" in bros_embedding_path:
                word_embs = state_dict["bert.embeddings.word_embeddings.weight"]
            elif "bros" in bros_embedding_path:
                word_embs = state_dict["embeddings.word_embeddings.weight"]
            elif "layoutlm" in bros_embedding_path:
                word_embs = state_dict["layoutlm.embeddings.word_embeddings.weight"]
            else:
                print("Wrong bros_embedding_path!")
            self.embedding = nn.Embedding.from_pretrained(word_embs)
            print("use_pretrain_weight: load model from:", bros_embedding_path)

    def forward(self, img, batched_inputs, stride=1):
        """Forward computation
        Args:
            img (Tensor): in shape of [B x 3 x H x W]
            batched_inputs (list[dict]):
        Returns:
            Tensor: in shape of [B x N x L x D], where D is the embedding_dim.
        """
        device = img.device
        batch_b, _, batch_h, batch_w = img.size()

        chargrid_map = torch.zeros(
            (batch_b, batch_h // stride, batch_w // stride), dtype=torch.int64
        ).to(device)
        for iter_b in range(batch_b):
            per_input_ids = batched_inputs[iter_b]["input_ids"]
            per_input_bbox = batched_inputs[iter_b]["bbox"]

            short_length_w = min(len(per_input_ids), len(per_input_bbox))

            if short_length_w > 0:
                for word_idx in range(short_length_w):
                    per_id = per_input_ids[word_idx]

                    bbox = per_input_bbox[word_idx] / stride
                    # w_start, h_start, w_end, h_end = (
                    #     bbox.round().astype(np.int).tolist()
                    # )
                    w_start, h_start, w_end, h_end = (
                        bbox.cpu().detach().numpy().round().astype(int).tolist()
                    )
                    if self.use_UNK_text:
                        chargrid_map[iter_b, h_start:h_end, w_start:w_end] = 100
                    else:
                        chargrid_map[iter_b, h_start:h_end, w_start:w_end] = per_id
        chargrid_map = self.embedding(chargrid_map)
        return chargrid_map


In [None]:
gird_embedding = WordnnEmbedding(bros_embedding_path=args.embedding_weights)


use_pretrain_weight: load model from: /home/saeed/Walnut/Quantization/models/layoutlm/


In [None]:
from detectron2.structures import ImageList
import torch
def preprocess_image(images):
    """
    Normalize, pad and batch the input images.
    """
    pixel_mean, pixel_std = torch.tensor([103.530, 116.280, 123.675]), torch.tensor([57.375, 57.120, 58.395])
    images = [(x.permute(2,1,0) - pixel_mean) / pixel_std for x in images]
    images = [x.permute(2,1,0) for x in images]
    images = ImageList.from_tensors(
        images,
        0,
        padding_constraints={},
    )
    print(images.tensor.shape)
    return images

In [None]:
import torch 
import numpy as np
input_tensor = torch.rand(3,1000,1000)
sizes_dict =     [{'height': torch.tensor(1000),'width': torch.tensor(1000)}]
input_dict = [{
    'input_ids':torch.tensor([2,444,566]),
    'bbox': torch.tensor([[112,334,444,666], [23,35,666,435], [22,66,343,432]]),
}]
imagelist = preprocess_image([input_tensor])

torch.Size([1, 3, 1000, 1000])


In [4]:
grid = gird_embedding(imagelist.tensor, input_dict)

NameError: name 'gird_embedding' is not defined

In [9]:
model.eval().to('cpu')

VGT(
  (backbone): GridFPN(
    (fpn_lateral2): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output2): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral3): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output3): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral4): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output4): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (fpn_lateral5): Conv2d(768, 256, kernel_size=(1, 1), stride=(1, 1))
    (fpn_output5): Conv2d(256, 256, kernel_size=(3, 3), stride=(1, 1), padding=(1, 1))
    (top_block): LastLevelMaxPool()
    (bottom_up): PTM_VIT_Backbone(
      (backbone): BEiT(
        (patch_embed): PatchEmbed(
          (proj): Conv2d(3, 768, kernel_size=(16, 16), stride=(16, 16))
        )
        (grid_patch_embed): PatchEmbed(
          (proj): Conv2d(64, 768, kernel_size=(16, 16), stride=(16, 16))
        )
 

In [None]:
torch_script = torch.jit.trace(model.forward, (imagelist, input_dict ))

In [None]:
onnx_program = torch.onnx.dynamo_export(model, imagelist, grid, sizes_dict)

In [None]:
import torch
quant =torch.ao.quantization.QuantStub()
dequant = torch.ao.quantization.DeQuantStub()
x = torch.rand(2,2)
print(x)
quant(x)

In [7]:
from torch.onnx import OperatorExportTypes
import onnx
import io
def export_onnx_model(model, inputs):
    """
    Trace and export a model to onnx format.

    Args:
        model (nn.Module):
        inputs (tuple[args]): the model will be called by `model(*inputs)`

    Returns:
        an onnx model
    """
    assert isinstance(model, torch.nn.Module)

    # make sure all modules are in eval mode, onnx may change the training state
    # of the module if the states are not consistent
    def _check_eval(module):
        assert not module.training

    model.apply(_check_eval)

    # Export the model to ONNX
    with torch.no_grad():
        with io.BytesIO() as f:
            torch.onnx.export(
                model,
                inputs,
                f,
                operator_export_type=OperatorExportTypes.ONNX_ATEN_FALLBACK,
                # verbose=True,  # NOTE: uncomment this for debugging
                # export_params=True,
                input_names=['batched_inputs']
            )
            onnx_model = onnx.load_from_string(f.getvalue())
            onnx.save(onnx_model, "model.onnx")

    return onnx_model

In [None]:
onnx_model= export_onnx_model(model, (input_dict,))

In [1]:
import onnxruntime
session = onnxruntime.InferenceSession("/home/saeed/Walnut/Quantization/model.onnx")

In [None]:
import torch
# Simple module for demonstration
class MyModule(torch.nn.Module):
    def __init__(self):
        super().__init__()
        self.param = torch.nn.Parameter(torch.rand(3, 4))
        self.linear = torch.nn.Linear(4, 5)

    def forward(self, x):
        return self.linear(x + self.param).clamp(min=0.0, max=1.0)

module = MyModule()

In [None]:
inp = torch.rand(3,4)

In [None]:
import torch.onnx as onnx
onnx.export(module, args=(inp), f="aba.onnx")

In [None]:
import onnxruntime
inference = onnxruntime.InferenceSession("/home/saeed/Walnut/Quantization/model.onnx", None)

In [None]:
quantized = torch.ao.quantization.quantize_dynamic(model ,  # the original model
    {torch.nn.Linear},  # a set of layers to dynamically quantize
    dtype=torch.qint8) 

In [None]:
quantized(image_batch, input_dict)

In [10]:
import torch
from torch.ao.quantization import (
  get_default_qconfig_mapping,
  get_default_qat_qconfig_mapping,
  QConfigMapping,
)
import torch.ao.quantization.quantize_fx as quantize_fx
import copy


#
# post training dynamic/weight_only quantization
#

# we need to deepcopy if we still want to keep model_fp unchanged after quantization since quantization apis change the input model
model_to_quantize = copy.deepcopy(model)
model_to_quantize.eval()
qconfig_mapping = QConfigMapping().set_global(torch.ao.quantization.default_dynamic_qconfig)
# a tuple of one or more example inputs are needed to trace the model
# example_inputs = (torch.rand(3,224,224))
# prepare
example_input = (imagelist, grid, sizes_dict)
model_prepared = quantize_fx.prepare_fx(model_to_quantize, qconfig_mapping, {"": example_input})
# no calibration needed when we only have dynamic/weight_only quantization
# quantize
model_quantized = quantize_fx.convert_fx(model_prepared)

  torch.has_cuda,
  torch.has_cudnn,
  torch.has_mps,
  torch.has_mkldnn,


RuntimeError: 'len' is not supported in symbolic tracing by default. If you want this call to be recorded, please call torch.fx.wrap('len') at module scope

In [None]:
a = list(list(model.children())[0].children())[0]

In [6]:
import torch
weights = torch.load("/home/saeed/Walnut/Quantization/models/model_final.pth")

In [10]:
x = list(weights['model'].keys())

In [11]:
x

['backbone.fpn_lateral2.weight',
 'backbone.fpn_lateral2.bias',
 'backbone.fpn_output2.weight',
 'backbone.fpn_output2.bias',
 'backbone.fpn_lateral3.weight',
 'backbone.fpn_lateral3.bias',
 'backbone.fpn_output3.weight',
 'backbone.fpn_output3.bias',
 'backbone.fpn_lateral4.weight',
 'backbone.fpn_lateral4.bias',
 'backbone.fpn_output4.weight',
 'backbone.fpn_output4.bias',
 'backbone.fpn_lateral5.weight',
 'backbone.fpn_lateral5.bias',
 'backbone.fpn_output5.weight',
 'backbone.fpn_output5.bias',
 'backbone.bottom_up.backbone.cls_token',
 'backbone.bottom_up.backbone.grid_token',
 'backbone.bottom_up.backbone.pos_embed',
 'backbone.bottom_up.backbone.grid_pos_embed',
 'backbone.bottom_up.backbone.patch_embed.proj.weight',
 'backbone.bottom_up.backbone.patch_embed.proj.bias',
 'backbone.bottom_up.backbone.grid_patch_embed.proj.weight',
 'backbone.bottom_up.backbone.grid_patch_embed.proj.bias',
 'backbone.bottom_up.backbone.blocks.0.gamma_1',
 'backbone.bottom_up.backbone.blocks.0.gamm

In [12]:
y = [k for k in x if 'embedding' in k]

In [13]:
y

['Wordgrid_embedding.embedding.weight',
 'Wordgrid_embedding.embedding_proj.weight']