## Import Modules

In [1]:
import warnings
warnings.simplefilter(action='ignore', category=FutureWarning)
warnings.simplefilter(action='ignore', category=UserWarning)

import os 
import glob
from transformers import MarkupLMProcessor
from transformers import MarkupLMForTokenClassification

import torch
from torch import nn
from torch.utils.data import Dataset
from torch.utils.data import DataLoader

import utils
# import input_pipeline as ip

import pandas as pd

from tqdm.auto import tqdm
import yaml

  from .autonotebook import tqdm as notebook_tqdm


In [2]:
torch.backends.mps.is_available()

True

In [3]:
device = torch.device("mps" if torch.backends.mps.is_available() else "cpu")
device = 'cpu'
print(f"Using device {device}")

mps_device = torch.device("mps")

Using device cpu


## Define Helper Functions

1. create_raw_dataset: takes the tagged csvs and creates a dict of 
    xpaths, nodes, node_labels

In [4]:
def create_raw_dataset(tagged_csv_path, id2label, label2id, is_train=True):
    """Preprocesses the tagged csvs in the format required by MarkupLM"""

    tagged_df = pd.read_csv(tagged_csv_path)

    # in train mode we expect text and xpaths that are highlighted 
    # by an annotator
    if is_train:
        col_list = ["nodes", "xpaths", "node_labels"]
        
        tagged_df["highlighted_xpaths"] = tagged_df["highlighted_xpaths"].fillna(
            tagged_df["xpaths"]
        )
        tagged_df["highlighted_segmented_text"] = tagged_df[
            "highlighted_segmented_text"
        ].fillna(tagged_df["text"])

        # drop non-ASCII chars
        tagged_df["highlighted_segmented_text"] = (
            tagged_df["highlighted_segmented_text"]
            .str.encode("ascii", errors="ignore")
            .str.decode("ascii")
        )

        # rename columns to match MarkupLM convention
        tagged_df = tagged_df.rename(
            columns={
                "highlighted_xpaths": "xpaths",
                "highlighted_segmented_text": "nodes",
                "tagged_sequence": "node_labels",
            },
        )

        # convert node labels to integer values
        tagged_df["node_labels"] = tagged_df["node_labels"].apply(
            lambda label: label2id[label]
        )
    
    else:
        col_list = ["nodes", "xpaths"]
        
        # rename columns to match MarkupLM convention
        tagged_df = tagged_df.rename(
            columns={
                "xpaths": "xpaths",
                "text": "nodes",
            },
        )     
    
    tagged_output = tagged_df.loc[:, col_list].to_dict(orient="list")

    # convert each key to a list of lists just like the MarkupLM
    # pipeline requires
    for k, v in tagged_output.items():
        tagged_output[k] = [v]

    return tagged_output

In [5]:
class MarkupLMDataset(Dataset):
    """Dataset for token classification with MarkupLM."""

    def __init__(self, data, processor=None, max_length=512, is_train=True):
        self.data = data
        self.is_train = is_train
        self.processor = processor
        self.max_length = max_length
        self.encodings = []
        self.get_encoding_windows()
        

    def get_encoding_windows(self):
        """Splits the tokenized input into windows of 512 tokens"""
                
        for item in self.data:            
            if self.is_train:
                nodes, xpaths, node_labels = (
                    item["nodes"],
                    item["xpaths"],
                    item['node_labels']
                )
            else:
                nodes, xpaths, node_labels = (
                    item["nodes"],
                    item["xpaths"],
                    None
                )                
            
            # provide encoding to processor
            encoding = self.processor(
                nodes=nodes,
                xpaths=xpaths,
                node_labels=node_labels,
                padding="max_length",
                max_length=self.max_length,
                return_tensors="pt",
                truncation=False,
                return_offsets_mapping=True
            )

            # remove batch dimension
            encoding = {k: v.squeeze() for k, v in encoding.items()}

            # chunk up the encoding sequences to that it is less than the 
            # max input length of 512 tokens
            if not self.is_train:
                #num_tokens = len(item['nodes'][0])
                num_tokens = len(encoding['input_ids'])
                
                for idx in range(0, num_tokens, self.max_length):
                    batch_encoding = {}
                    for k, v in encoding.items():
                        batch_encoding[k] = v[idx: idx + self.max_length]

                    self.encodings.append(batch_encoding)                    
                    continue
            
            else:
                if len(encoding["input_ids"]) <= self.max_length:                    
                    self.encodings.append(encoding)
                    continue

                else:
                    batch_encoding = {}

                    start_idx, end_idx = 0, self.max_length

                    while end_idx < len(encoding["input_ids"]):
                        # decrement the end_idx by 1 until the label is not -100
                        while encoding["labels"][end_idx] == -100:
                            end_idx = end_idx - 1

                            # if the end idx is equal to the start idx meaning
                            # we don't encounter a non -100 token,
                            # we set window size as the max_length
                            if end_idx == start_idx:
                                end_idx = start_idx + self.max_length
                                break

                        for k, v in encoding.items():
                            batch_encoding[k] = v[start_idx:end_idx]

                        self.encodings.append(batch_encoding)
                        batch_encoding = {}

                        # update the pointers
                        start_idx = end_idx
                        end_idx = end_idx + self.max_length

                    # collect the remaining tokens
                    for k, v in encoding.items():
                        batch_encoding[k] = v[start_idx:]

                    if batch_encoding:
                        self.encodings.append(batch_encoding)

    def __len__(self):
        return len(self.encodings)

    def __getitem__(self, idx):
        # first, get nodes, xpaths and node labels
        item = self.encodings[idx]

        # pad the encodings to max_length of 512 tokens
        padded_item = self.processor.tokenizer.pad(
            item, max_length=self.max_length, padding="max_length", return_tensors="pt"
        )

        return padded_item

## Define Inference Loop and Main Function Execution

In [6]:
def run_inference_loop(dataloader, model, device, config, processor):
    '''Runs eval loop for entire dataset

    Args:
        dataloader: torch.utils.data.DataLoader: iterator over Dataset object
        model: transformers.PreTrainedModel. fine-tuned MarkupLM model
        device: torch.device. Specifies whether GPU is available for computation
        label_list: list. List of labels used to train the MarkupLM model
        config: dict. Contains user-provided params and args

    Returns:
        None
    '''
    model.eval()
    
    results = {"nodes": [], "preds": []}
    for batch in tqdm(dataloader, desc='inference_loop'):
        # get the inputs;
        inputs = {k: v.to(device) for k, v in batch.items()}

        # if ablation mode is set to true then
        # either mask the xpaths or shuffle them
        if config["ablation"]["run_ablation"]:
            inputs = utils.ablation(config, inputs)

        # get the offset mapping. It contains the spans of the 
        # words that were split during tokenization. 
        # Info present at a token level
        offset_mapping = inputs.pop('offset_mapping').squeeze().tolist()

        # forward + backward + optimize
        outputs = model(**inputs)

        predictions = outputs.logits.argmax(dim=-1)
        #logits[1] = outputs.logits
        #print(predictions.squeeze().tolist())
        pred_labels = [model.config.id2label[idx] for idx in predictions.squeeze().tolist()]
        
        input_ids = inputs['input_ids'].detach().numpy().flatten().tolist()
        input_word_pieces = [processor.decode([id]) for id in input_ids]
        
        
        # input_ids = [x for x in input_ids if x not in special_tokens]
        # print(input_ids)
        results['nodes'].append(input_word_pieces)
        results['preds'].append(pred_labels)
                                
    return results

In [7]:
def main(config, test_data, model_ckpt_path=None, is_train=False):
    '''Main execution of script'''
    # get the  list of labels along with the label to id mapping and
    # reverse mapping
    label_list, id2label, label2id = utils.get_label_list(config)
    
    # define the processor and model
    if config["model"]["use_large_model"]:
        processor = MarkupLMProcessor.from_pretrained(
            "microsoft/markuplm-large",
            only_label_first_subword=config['model']['label_only_first_subword']
        )
        model = MarkupLMForTokenClassification.from_pretrained(
            "microsoft/markuplm-large", id2label=id2label, label2id=label2id
        )

    else:
        processor = MarkupLMProcessor.from_pretrained(
            "microsoft/markuplm-base",
            only_label_first_subword=config['model']['label_only_first_subword'],
        )
        model = MarkupLMForTokenClassification.from_pretrained(
            "microsoft/markuplm-base", id2label=id2label, label2id=label2id
        )
        
    if model_ckpt_path is not None:
        model_ckpt = torch.load(model_ckpt_path, 
                                map_location='cpu')
        
        model.load_state_dict(model_ckpt)
        

    processor.parse_html = False
    
    # convert the input dataset
    # to torch datasets. Create the dataloaders as well
    test_dataset = MarkupLMDataset(
        data=test_data,
        processor=processor,
        max_length=config["model"]["max_length"],
        is_train=is_train
    )
    test_dataloader = DataLoader(
        test_dataset,
        batch_size=1,
        shuffle=False
    )

    model.to(device)  # move to GPU if available

    print("*" * 50)
    print(f'Running Inference Loop!')
    print("*" * 50)

    # run inference loop
    results = run_inference_loop(test_dataloader, model, device, 
                                     config, processor)


    return results

# Inference Execution

## Specify the input params

In [9]:
config_path = './configs/config.yaml'

test_contract_dir = "../contracts/test"
val_contract_dir = "../contracts/val"
train_contract_dir = "../contracts/train"

# if loading from ckpt then change the line below
#model_ckpt_path = "/Users/pranabislam/Desktop/research/MarkupMnA-Markup-Based-Segmentation-of-MnA-Agreements/markup-mna/models/pretrained_models/markuplm_base_model_ablation_shuffle_num_contract_100pct_f1-0.871.pt"
model_ckpt_path = "/Users/pranabislam/Desktop/research/MarkupMnA-Markup-Based-Segmentation-of-MnA-Agreements/markup-mna/models/pretrained_models/markuplm_base_model_num_contracts-121_epoch-10_f1-0.903.pt"
max_length = 512
test_batch_size = 1

In [10]:
# read the config file 
with open(config_path, 'r') as yml:
    config = yaml.safe_load(yml)

In [11]:
label_list, id2label, label2id = utils.get_label_list(config)
num_labels = len(label2id)

In [12]:
test_contracts = glob.glob(os.path.join(test_contract_dir, "*.csv"))

#test_contracts = [test_contracts[2]]

print(f"Found {len(test_contracts)} in test dir")

Found 20 in test dir


In [13]:
test_contracts

['../contracts/test/contract_69.csv',
 '../contracts/test/contract_81.csv',
 '../contracts/test/contract_85.csv',
 '../contracts/test/contract_46.csv',
 '../contracts/test/contract_84.csv',
 '../contracts/test/contract_92.csv',
 '../contracts/test/contract_37.csv',
 '../contracts/test/contract_104.csv',
 '../contracts/test/contract_35.csv',
 '../contracts/test/contract_31.csv',
 '../contracts/test/contract_128.csv',
 '../contracts/test/contract_129.csv',
 '../contracts/test/contract_30.csv',
 '../contracts/test/contract_116.csv',
 '../contracts/test/contract_137.csv',
 '../contracts/test/contract_61.csv',
 '../contracts/test/contract_67.csv',
 '../contracts/test/contract_72.csv',
 '../contracts/test/contract_58.csv',
 '../contracts/test/contract_59.csv']

In [14]:
[x.split('_')[-1].split('.csv')[0] for x in test_contracts]

['69',
 '81',
 '85',
 '46',
 '84',
 '92',
 '37',
 '104',
 '35',
 '31',
 '128',
 '129',
 '30',
 '116',
 '137',
 '61',
 '67',
 '72',
 '58',
 '59']

In [15]:
all_test_data = [] 
for tagged_path in test_contracts:
    tagged_output = create_raw_dataset(tagged_path, 
                                       id2label=id2label, 
                                       label2id=label2id,
                                       is_train=False)

    all_test_data.append(tagged_output)

In [16]:
len(all_test_data)

20

### Run the inference pipeline

In [14]:
def create_repeat_xpaths(processor, row):
    '''
    Given a row in a dataset, create a list of repeating xpaths with length of the number of tokens
    in the text row
    '''
    enc = processor(
        nodes=[row.text],
        xpaths=[row.xpaths],
        #node_labels=node_labels,
        #padding="max_length",
        max_length=512,
        return_tensors="pt",
        truncation=False,
    )
    
    num_tokens = enc.input_ids.shape[1] - 2
    tokens = [processor.decode(x, skip_special_tokens=False) for x in enc.input_ids[0][1:-1]]
    
    return [[row.xpaths] * num_tokens, tokens]

In [18]:
processor = MarkupLMProcessor.from_pretrained(
    "microsoft/markuplm-base",
    only_label_first_subword=True
)
processor.parse_html = False

for i, test_data in enumerate(all_test_data):
    
    test_data = [test_data]
    contract_num = test_contracts[i].split('_')[-1].split('.csv')[0]
    inference_csv_path = test_contracts[i]
    
    results = main(config, test_data, model_ckpt_path=model_ckpt_path,is_train=False)
    df = pd.DataFrame.from_dict(results)
    df = df.explode(['nodes', 'preds']).reset_index(drop=True)
    
    inference_csv = pd.read_csv(inference_csv_path)[['xpaths','text']].copy()
    
    g = inference_csv.apply(lambda row: create_repeat_xpaths(processor, row), axis=1)
    
    inference_csv['xpaths_list'] = g.apply(lambda x: x[0])
    inference_csv['text_list'] = g.apply(lambda x: x[1])
    
    exploded = inference_csv.explode(['xpaths_list', 'text_list']).reset_index(drop=True)
    
    filter_df_based_on_start_and_end_token = df.query("nodes == '<s>' | nodes == '</s>'")
    start_idx = filter_df_based_on_start_and_end_token.index.min() + 1
    end_idx = filter_df_based_on_start_and_end_token.index.max()
    
    preds = df.iloc[start_idx:end_idx].copy()
    
    exploded_partial = exploded.copy()
    
    exploded_partial['nodes'] = preds['nodes'].to_list()
    exploded_partial['preds'] = preds['preds'].to_list()
    
    print(sum(exploded_partial.text_list == exploded_partial.nodes) == len(exploded_partial))
    
    print((exploded_partial['xpaths'] == exploded_partial['xpaths_list']).sum() == len(exploded_partial))
    
    exploded_partial[['xpaths','text','nodes','preds']].to_json(f'predictions_contract_{contract_num}.json',orient='records')
    print('contract done', contract_num)

Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************
huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 133/133 [01:17<00:00,  1.71it/s]


True
True
contract done 69


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/208 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 208/208 [02:11<00:00,  1.59it/s]


True
True
contract done 81


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/118 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 118/118 [01:13<00:00,  1.61it/s]


True
True
contract done 85


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/180 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 180/180 [01:42<00:00,  1.75it/s]


True
True
contract done 46


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/432 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 432/432 [04:19<00:00,  1.66it/s]


True
True
contract done 84


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/159 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 159/159 [01:32<00:00,  1.71it/s]


True
True
contract done 92


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/142 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 142/142 [01:30<00:00,  1.57it/s]


True
True
contract done 37


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/126 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 126/126 [01:17<00:00,  1.63it/s]


True
True
contract done 104


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/144 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 144/144 [01:33<00:00,  1.54it/s]


True
True
contract done 35


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/144 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 144/144 [01:33<00:00,  1.54it/s]


True
True
contract done 31


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/181 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 181/181 [01:57<00:00,  1.54it/s]


True
True
contract done 128


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/125 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 125/125 [01:21<00:00,  1.53it/s]


True
True
contract done 129


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/107 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 107/107 [01:08<00:00,  1.55it/s]


True
True
contract done 30


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/123 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 123/123 [01:21<00:00,  1.51it/s]


True
True
contract done 116


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/131 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 131/131 [01:29<00:00,  1.47it/s]


True
True
contract done 137


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 133/133 [01:30<00:00,  1.47it/s]


True
True
contract done 61


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/120 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 120/120 [01:16<00:00,  1.56it/s]


True
True
contract done 67


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/127 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 127/127 [01:24<00:00,  1.51it/s]


True
True
contract done 72


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/112 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 112/112 [01:13<00:00,  1.52it/s]


True
True
contract done 58


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.transform.LayerNorm.weight', 'nrp_cls.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'nrp_cls.dense.bias', 'cls.predictions.transform.dense.bias', 'ptc_cls.bias', 'cls.predictions.decoder.weight', 'markuplm.pooler.dense.bias', 'nrp_cls.dense.weight', 'cls.predictions.bias', 'cls.predictions.decoder.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight', 'cls.predictions.transform.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'markuplm.pooler.dense.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                    | 0/97 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|███████████████████████████| 97/97 [01:02<00:00,  1.56it/s]


True
True
contract done 59


In [13]:
val_contracts = glob.glob(os.path.join(val_contract_dir, "*.csv"))

print(f"Found {len(val_contracts)} in val dir")

all_data = [] 
for tagged_path in val_contracts:
    tagged_output = create_raw_dataset(tagged_path, 
                                       id2label=id2label, 
                                       label2id=label2id,
                                       is_train=False)

    all_data.append(tagged_output)

Found 10 in val dir


In [15]:
processor = MarkupLMProcessor.from_pretrained(
    "microsoft/markuplm-base",
    only_label_first_subword=True
)
processor.parse_html = False

In [16]:
for i, data in enumerate(all_data):
    
    data = [data]
    contract_num = val_contracts[i].split('_')[-1].split('.csv')[0]
    inference_csv_path = val_contracts[i]
    
    results = main(config, data, model_ckpt_path=model_ckpt_path,is_train=False)
    df = pd.DataFrame.from_dict(results)
    df = df.explode(['nodes', 'preds']).reset_index(drop=True)
    
    inference_csv = pd.read_csv(inference_csv_path)[['xpaths','text']].copy()
    
    g = inference_csv.apply(lambda row: create_repeat_xpaths(processor, row), axis=1)
    
    inference_csv['xpaths_list'] = g.apply(lambda x: x[0])
    inference_csv['text_list'] = g.apply(lambda x: x[1])
    
    exploded = inference_csv.explode(['xpaths_list', 'text_list']).reset_index(drop=True)
    
    filter_df_based_on_start_and_end_token = df.query("nodes == '<s>' | nodes == '</s>'")
    start_idx = filter_df_based_on_start_and_end_token.index.min() + 1
    end_idx = filter_df_based_on_start_and_end_token.index.max()
    
    preds = df.iloc[start_idx:end_idx].copy()
    
    exploded_partial = exploded.copy()
    
    exploded_partial['nodes'] = preds['nodes'].to_list()
    exploded_partial['preds'] = preds['preds'].to_list()
    
    print(sum(exploded_partial.text_list == exploded_partial.nodes) == len(exploded_partial))
    
    print((exploded_partial['xpaths'] == exploded_partial['xpaths_list']).sum() == len(exploded_partial))
    
    exploded_partial[['xpaths','text','nodes','preds']].to_json(f'val_inference/predictions_contract_{contract_num}.json',orient='records')
    print('contract done', contract_num)

Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.


huggingface/tokenizers: The current process just got forked, after parallelism has already been used. Disabling parallelism to avoid deadlocks...
	- Avoid using `tokenizers` before the fork if possible
	- Explicitly set the environment variable TOKENIZERS_PARALLELISM=(true | false)


inference_loop: 100%|█████████████████████████| 133/133 [01:06<00:00,  2.01it/s]


True
True
contract done 40


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/105 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 105/105 [00:56<00:00,  1.87it/s]


True
True
contract done 55


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/156 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 156/156 [01:20<00:00,  1.93it/s]


True
True
contract done 41


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/141 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 141/141 [01:16<00:00,  1.85it/s]


True
True
contract done 52


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/123 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 123/123 [01:05<00:00,  1.88it/s]


True
True
contract done 86


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/160 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 160/160 [01:21<00:00,  1.97it/s]


True
True
contract done 79


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/118 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 118/118 [01:00<00:00,  1.96it/s]


True
True
contract done 39


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/186 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 186/186 [01:32<00:00,  2.01it/s]


True
True
contract done 76


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/107 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 107/107 [00:53<00:00,  2.00it/s]


True
True
contract done 65


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/168 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 168/168 [01:23<00:00,  2.01it/s]


True
True
contract done 140


In [18]:
train_contracts = glob.glob(os.path.join(train_contract_dir, "*.csv"))

print(f"Found {len(train_contracts)} in train dir")

all_data = [] 
for tagged_path in train_contracts:
    tagged_output = create_raw_dataset(tagged_path, 
                                       id2label=id2label, 
                                       label2id=label2id,
                                       is_train=False)

    all_data.append(tagged_output)

Found 121 in train dir


In [19]:
for i, data in enumerate(all_data):
    
    data = [data]
    contract_num = train_contracts[i].split('_')[-1].split('.csv')[0]
    inference_csv_path = train_contracts[i]
    
    results = main(config, data, model_ckpt_path=model_ckpt_path,is_train=False)
    df = pd.DataFrame.from_dict(results)
    df = df.explode(['nodes', 'preds']).reset_index(drop=True)
    
    inference_csv = pd.read_csv(inference_csv_path)[['xpaths','text']].copy()
    
    g = inference_csv.apply(lambda row: create_repeat_xpaths(processor, row), axis=1)
    
    inference_csv['xpaths_list'] = g.apply(lambda x: x[0])
    inference_csv['text_list'] = g.apply(lambda x: x[1])
    
    exploded = inference_csv.explode(['xpaths_list', 'text_list']).reset_index(drop=True)
    
    filter_df_based_on_start_and_end_token = df.query("nodes == '<s>' | nodes == '</s>'")
    start_idx = filter_df_based_on_start_and_end_token.index.min() + 1
    end_idx = filter_df_based_on_start_and_end_token.index.max()
    
    preds = df.iloc[start_idx:end_idx].copy()
    
    exploded_partial = exploded.copy()
    
    exploded_partial['nodes'] = preds['nodes'].to_list()
    exploded_partial['preds'] = preds['preds'].to_list()
    
    print(sum(exploded_partial.text_list == exploded_partial.nodes) == len(exploded_partial))
    
    print((exploded_partial['xpaths'] == exploded_partial['xpaths_list']).sum() == len(exploded_partial))
    
    exploded_partial[['xpaths','text','nodes','preds']].to_json(f'train_inference/predictions_contract_{contract_num}.json',orient='records')
    print('contract done', contract_num)

Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/114 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 114/114 [00:55<00:00,  2.04it/s]


True
True
contract done 83


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/106 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 106/106 [00:51<00:00,  2.04it/s]


True
True
contract done 97


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/163 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 163/163 [01:22<00:00,  1.98it/s]


True
True
contract done 54


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/310 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 310/310 [02:42<00:00,  1.91it/s]


True
True
contract done 68


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/121 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 121/121 [01:04<00:00,  1.88it/s]


True
True
contract done 96


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/119 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 119/119 [00:59<00:00,  2.01it/s]


True
True
contract done 82


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/183 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 183/183 [01:31<00:00,  2.00it/s]


True
True
contract done 94


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/135 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 135/135 [01:07<00:00,  1.99it/s]


True
True
contract done 80


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/174 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 174/174 [01:26<00:00,  2.02it/s]


True
True
contract done 57


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/101 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 101/101 [00:50<00:00,  1.99it/s]


True
True
contract done 43


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/155 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 155/155 [01:15<00:00,  2.05it/s]


True
True
contract done 42


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/139 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 139/139 [01:08<00:00,  2.04it/s]


True
True
contract done 56


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/142 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 142/142 [01:11<00:00,  1.97it/s]


True
True
contract done 95


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/156 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 156/156 [01:26<00:00,  1.81it/s]


True
True
contract done 91


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/127 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 127/127 [01:10<00:00,  1.79it/s]


True
True
contract done 47


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 133/133 [01:16<00:00,  1.73it/s]


True
True
contract done 53


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/145 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 145/145 [01:25<00:00,  1.69it/s]


True
True
contract done 90


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/141 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 141/141 [01:30<00:00,  1.56it/s]


True
True
contract done 45


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/169 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 169/169 [01:51<00:00,  1.52it/s]


True
True
contract done 51


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/144 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 144/144 [01:43<00:00,  1.39it/s]


True
True
contract done 148


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/184 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 184/184 [02:20<00:00,  1.31it/s]


True
True
contract done 50


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/194 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 194/194 [02:36<00:00,  1.24it/s]


True
True
contract done 44


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/112 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 112/112 [01:30<00:00,  1.23it/s]


True
True
contract done 78


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/138 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 138/138 [01:21<00:00,  1.70it/s]


True
True
contract done 93


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/107 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 107/107 [00:53<00:00,  1.99it/s]


True
True
contract done 87


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/184 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 184/184 [01:28<00:00,  2.07it/s]


True
True
contract done 23


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                    | 0/99 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|███████████████████████████| 99/99 [00:49<00:00,  2.00it/s]


True
True
contract done 106


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/101 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 101/101 [00:49<00:00,  2.04it/s]


True
True
contract done 112


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/107 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 107/107 [00:52<00:00,  2.06it/s]


True
True
contract done 113


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 133/133 [01:04<00:00,  2.05it/s]


True
True
contract done 107


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/132 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 132/132 [01:04<00:00,  2.04it/s]


True
True
contract done 36


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/139 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 139/139 [01:07<00:00,  2.06it/s]


True
True
contract done 22


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/157 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 157/157 [01:17<00:00,  2.03it/s]


True
True
contract done 34


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/160 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 160/160 [01:24<00:00,  1.89it/s]


True
True
contract done 20


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/122 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 122/122 [01:07<00:00,  1.81it/s]


True
True
contract done 139


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/173 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 173/173 [01:36<00:00,  1.79it/s]


True
True
contract done 111


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/207 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 207/207 [01:52<00:00,  1.84it/s]


True
True
contract done 105


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/157 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 157/157 [01:26<00:00,  1.82it/s]


True
True
contract done 110


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/100 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 100/100 [00:54<00:00,  1.83it/s]


True
True
contract done 138


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                    | 0/97 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|███████████████████████████| 97/97 [00:56<00:00,  1.72it/s]


True
True
contract done 21


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/141 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 141/141 [01:38<00:00,  1.44it/s]


True
True
contract done 114


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/153 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 153/153 [01:27<00:00,  1.75it/s]


True
True
contract done 19


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/146 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 146/146 [01:24<00:00,  1.73it/s]


True
True
contract done 100


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/142 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 142/142 [01:20<00:00,  1.77it/s]


True
True
contract done 25


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/175 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 175/175 [01:34<00:00,  1.85it/s]


True
True
contract done 24


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/124 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 124/124 [01:08<00:00,  1.80it/s]


True
True
contract done 101


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/140 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 140/140 [01:13<00:00,  1.90it/s]


True
True
contract done 18


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/125 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 125/125 [01:07<00:00,  1.86it/s]


True
True
contract done 115


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/132 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 132/132 [01:09<00:00,  1.89it/s]


True
True
contract done 103


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/186 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 186/186 [01:37<00:00,  1.91it/s]


True
True
contract done 8


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/148 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 148/148 [01:17<00:00,  1.90it/s]


True
True
contract done 117


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/150 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 150/150 [01:18<00:00,  1.90it/s]


True
True
contract done 26


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/106 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 106/106 [00:56<00:00,  1.88it/s]


True
True
contract done 32


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/405 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 405/405 [03:48<00:00,  1.77it/s]


True
True
contract done 33


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/199 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 199/199 [01:53<00:00,  1.76it/s]


True
True
contract done 27


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/129 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 129/129 [01:16<00:00,  1.69it/s]


True
True
contract done 9


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/111 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 111/111 [01:05<00:00,  1.70it/s]


True
True
contract done 102


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/135 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 135/135 [01:14<00:00,  1.81it/s]


True
True
contract done 4


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/136 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 136/136 [01:20<00:00,  1.68it/s]


True
True
contract done 16


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/117 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 117/117 [01:10<00:00,  1.66it/s]


True
True
contract done 127


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/112 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 112/112 [01:02<00:00,  1.80it/s]


True
True
contract done 133


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 133/133 [01:13<00:00,  1.82it/s]


True
True
contract done 132


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/157 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 157/157 [01:26<00:00,  1.82it/s]


True
True
contract done 126


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/151 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 151/151 [01:28<00:00,  1.71it/s]


True
True
contract done 17


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/197 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 197/197 [01:52<00:00,  1.75it/s]


True
True
contract done 5


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/132 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 132/132 [01:16<00:00,  1.72it/s]


True
True
contract done 15


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/182 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 182/182 [01:44<00:00,  1.74it/s]


True
True
contract done 118


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/145 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 145/145 [01:22<00:00,  1.75it/s]


True
True
contract done 7


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/129 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 129/129 [01:13<00:00,  1.75it/s]


True
True
contract done 29


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/122 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 122/122 [01:09<00:00,  1.76it/s]


True
True
contract done 130


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/111 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 111/111 [01:04<00:00,  1.73it/s]


True
True
contract done 124


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/104 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 104/104 [00:59<00:00,  1.75it/s]


True
True
contract done 125


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/142 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 142/142 [01:20<00:00,  1.77it/s]


True
True
contract done 131


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/145 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 145/145 [01:23<00:00,  1.74it/s]


True
True
contract done 28


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                    | 0/85 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|███████████████████████████| 85/85 [00:49<00:00,  1.71it/s]


True
True
contract done 119


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                    | 0/93 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|███████████████████████████| 93/93 [00:54<00:00,  1.72it/s]


True
True
contract done 6


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/139 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 139/139 [01:19<00:00,  1.74it/s]


True
True
contract done 14


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/140 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 140/140 [01:18<00:00,  1.78it/s]


True
True
contract done 135


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/169 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 169/169 [01:41<00:00,  1.67it/s]


True
True
contract done 38


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/200 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 200/200 [01:49<00:00,  1.83it/s]


True
True
contract done 121


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/151 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 151/151 [01:27<00:00,  1.73it/s]


True
True
contract done 10


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/151 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 151/151 [01:26<00:00,  1.75it/s]


True
True
contract done 109


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/165 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 165/165 [01:33<00:00,  1.76it/s]


True
True
contract done 2


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/119 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 119/119 [01:08<00:00,  1.73it/s]


True
True
contract done 3


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/171 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 171/171 [01:39<00:00,  1.71it/s]


True
True
contract done 108


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/104 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 104/104 [01:00<00:00,  1.73it/s]


True
True
contract done 11


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/153 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 153/153 [01:28<00:00,  1.73it/s]


True
True
contract done 120


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/144 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 144/144 [01:22<00:00,  1.75it/s]


True
True
contract done 134


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/139 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 139/139 [01:19<00:00,  1.74it/s]


True
True
contract done 122


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/109 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 109/109 [01:02<00:00,  1.74it/s]


True
True
contract done 136


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/141 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 141/141 [01:21<00:00,  1.73it/s]


True
True
contract done 1


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/144 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 144/144 [01:21<00:00,  1.76it/s]


True
True
contract done 13


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/127 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 127/127 [01:13<00:00,  1.73it/s]


True
True
contract done 12


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/133 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 133/133 [01:16<00:00,  1.74it/s]


True
True
contract done 0


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/149 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 149/149 [01:24<00:00,  1.76it/s]


True
True
contract done 123


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/131 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 131/131 [01:16<00:00,  1.72it/s]


True
True
contract done 75


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/136 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 136/136 [01:17<00:00,  1.76it/s]


True
True
contract done 144


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/136 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 136/136 [01:18<00:00,  1.73it/s]


True
True
contract done 49


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/164 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 164/164 [01:33<00:00,  1.74it/s]


True
True
contract done 150


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/155 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 155/155 [01:27<00:00,  1.78it/s]


True
True
contract done 151


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/136 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 136/136 [01:17<00:00,  1.75it/s]


True
True
contract done 48


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/328 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 328/328 [03:01<00:00,  1.81it/s]


True
True
contract done 145


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/126 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 126/126 [01:09<00:00,  1.81it/s]


True
True
contract done 74


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/132 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 132/132 [01:12<00:00,  1.83it/s]


True
True
contract done 60


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/130 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 130/130 [01:13<00:00,  1.78it/s]


True
True
contract done 89


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/130 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 130/130 [01:11<00:00,  1.82it/s]


True
True
contract done 62


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/171 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 171/171 [01:44<00:00,  1.63it/s]


True
True
contract done 147


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/142 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 142/142 [01:23<00:00,  1.70it/s]


True
True
contract done 146


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/121 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 121/121 [01:07<00:00,  1.78it/s]


True
True
contract done 63


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/131 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 131/131 [01:13<00:00,  1.77it/s]


True
True
contract done 77


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/152 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 152/152 [01:26<00:00,  1.77it/s]


True
True
contract done 88


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/126 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 126/126 [01:19<00:00,  1.59it/s]


True
True
contract done 98


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/143 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 143/143 [01:22<00:00,  1.74it/s]


True
True
contract done 142


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/205 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 205/205 [01:58<00:00,  1.73it/s]


True
True
contract done 73


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/164 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 164/164 [01:36<00:00,  1.70it/s]


True
True
contract done 66


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/145 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 145/145 [01:25<00:00,  1.69it/s]


True
True
contract done 143


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/158 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 158/158 [01:31<00:00,  1.72it/s]


True
True
contract done 99


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/120 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 120/120 [01:08<00:00,  1.74it/s]


True
True
contract done 141


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/117 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 117/117 [01:06<00:00,  1.75it/s]


True
True
contract done 64


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/161 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 161/161 [01:40<00:00,  1.60it/s]


True
True
contract done 70


Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['cls.predictions.bias', 'ptc_cls.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'nrp_cls.dense.weight', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.bias', 'nrp_cls.dense.bias', 'nrp_cls.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'nrp_cls.LayerNorm.weight', 'cls.predictions.decoder.weight', 'nrp_cls.decoder.bias', 'ptc_cls.bias', 'nrp_cls.decoder.weight', 'cls.predictions.transform.LayerNorm.bias']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/138 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 138/138 [01:21<00:00,  1.69it/s]


True
True
contract done 71


In [105]:
results = main(config, test_data, model_ckpt_path=model_ckpt_path,is_train=False)

Some weights of the model checkpoint at microsoft/markuplm-base were not used when initializing MarkupLMForTokenClassification: ['nrp_cls.decoder.bias', 'markuplm.pooler.dense.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.decoder.weight', 'nrp_cls.dense.bias', 'nrp_cls.dense.weight', 'nrp_cls.LayerNorm.bias', 'ptc_cls.weight', 'cls.predictions.bias', 'ptc_cls.bias', 'cls.predictions.transform.LayerNorm.bias', 'markuplm.pooler.dense.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.dense.bias', 'nrp_cls.LayerNorm.weight', 'nrp_cls.decoder.weight']
- This IS expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).
- This IS NOT expected if you are initializing MarkupLMForTokenClassification from the checkpoint of a model that 

**************************************************
Running Inference Loop!
**************************************************


inference_loop:   0%|                                   | 0/118 [00:00<?, ?it/s]You're using a MarkupLMTokenizerFast tokenizer. Please note that with a fast tokenizer, using the `__call__` method is faster than using a method to encode the text followed by a call to the `pad` method to get a padded encoding.
inference_loop: 100%|█████████████████████████| 118/118 [01:04<00:00,  1.84it/s]


In [106]:
df = pd.DataFrame.from_dict(results)

In [108]:
df = df.explode(['nodes', 'preds']).reset_index(drop=True)

## Let's try to match the predictions with the visualizer now (need some way to glue back to xpaths)

In [111]:
#inference_csv_path = '/Users/pranabislam/Desktop/research/MarkupMnA-Markup-Based-Segmentation-of-MnA-Agreements/contracts/test/contract_81.csv'
inference_csv_path = test_contracts[0]
inference_csv = pd.read_csv(inference_csv_path)[['xpaths','text']].copy()
inference_csv

Unnamed: 0,xpaths,text
0,/html/body/document/type,EX-2.1
1,/html/body/document/type/sequence,2
2,/html/body/document/type/sequence/filename,d101795dex21.htm
3,/html/body/document/type/sequence/filename/des...,EX-2.1
4,/html/body/document/type/sequence/filename/des...,EX-2.1
...,...,...
2921,/html/body/document/type/sequence/filename/des...,Transactions
2922,/html/body/document/type/sequence/filename/des...,Section 3.4(a)
2923,/html/body/document/type/sequence/filename/des...,U.S. Pension Plan
2924,/html/body/document/type/sequence/filename/des...,Section 9.4


In [113]:
processor = MarkupLMProcessor.from_pretrained(
    "microsoft/markuplm-base",
    only_label_first_subword=True
)
processor.parse_html = False

In [114]:
g = inference_csv.apply(lambda row: create_repeat_xpaths(processor, row), axis=1)

In [115]:
g

0       [[/html/body/document/type, /html/body/documen...
1              [[/html/body/document/type/sequence], [2]]
2       [[/html/body/document/type/sequence/filename, ...
3       [[/html/body/document/type/sequence/filename/d...
4       [[/html/body/document/type/sequence/filename/d...
                              ...                        
2921    [[/html/body/document/type/sequence/filename/d...
2922    [[/html/body/document/type/sequence/filename/d...
2923    [[/html/body/document/type/sequence/filename/d...
2924    [[/html/body/document/type/sequence/filename/d...
2925    [[/html/body/document/type/sequence/filename/d...
Length: 2926, dtype: object

In [116]:
inference_csv['xpaths_list'] = g.apply(lambda x: x[0])
inference_csv['text_list'] = g.apply(lambda x: x[1])

In [117]:
exploded = inference_csv.explode(['xpaths_list', 'text_list']).reset_index(drop=True)
exploded

Unnamed: 0,xpaths,text,xpaths_list,text_list
0,/html/body/document/type,EX-2.1,/html/body/document/type,EX
1,/html/body/document/type,EX-2.1,/html/body/document/type,-
2,/html/body/document/type,EX-2.1,/html/body/document/type,2
3,/html/body/document/type,EX-2.1,/html/body/document/type,.
4,/html/body/document/type,EX-2.1,/html/body/document/type,1
...,...,...,...,...
60072,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,An
60073,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,nex
60074,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,I
60075,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,-


In [118]:
df

Unnamed: 0,nodes,preds
0,<s>,o
1,EX,o
2,-,s_n
3,2,s_ssn
4,.,o
...,...,...
60411,<pad>,o
60412,<pad>,o
60413,<pad>,o
60414,<pad>,o


Looks like pad tokens are included here...

In [119]:
df.query("nodes == '<s>' | nodes == '</s>'")

Unnamed: 0,nodes,preds
0,<s>,o
60078,</s>,o


In [120]:
filter_df_based_on_start_and_end_token = df.query("nodes == '<s>' | nodes == '</s>'")
start_idx = filter_df_based_on_start_and_end_token.index.min() + 1
end_idx = filter_df_based_on_start_and_end_token.index.max()

In [121]:
start_idx, end_idx

(1, 60078)

In [122]:
preds = df.iloc[start_idx:end_idx].copy()

In [123]:
preds

Unnamed: 0,nodes,preds
1,EX,o
2,-,s_n
3,2,s_ssn
4,.,o
5,1,o
...,...,...
60073,An,s_n
60074,nex,s_n
60075,I,b_n
60076,-,e_n


In [124]:
exploded

Unnamed: 0,xpaths,text,xpaths_list,text_list
0,/html/body/document/type,EX-2.1,/html/body/document/type,EX
1,/html/body/document/type,EX-2.1,/html/body/document/type,-
2,/html/body/document/type,EX-2.1,/html/body/document/type,2
3,/html/body/document/type,EX-2.1,/html/body/document/type,.
4,/html/body/document/type,EX-2.1,/html/body/document/type,1
...,...,...,...,...
60072,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,An
60073,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,nex
60074,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,I
60075,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,-


In [125]:
#exploded_partial = exploded.iloc[:len(preds)].copy().reset_index(drop=True)
#exploded_partial
exploded_partial = exploded.copy()

In [126]:
exploded_partial['nodes'] = preds['nodes'].to_list()
exploded_partial['preds'] = preds['preds'].to_list()
exploded_partial

Unnamed: 0,xpaths,text,xpaths_list,text_list,nodes,preds
0,/html/body/document/type,EX-2.1,/html/body/document/type,EX,EX,o
1,/html/body/document/type,EX-2.1,/html/body/document/type,-,-,s_n
2,/html/body/document/type,EX-2.1,/html/body/document/type,2,2,s_ssn
3,/html/body/document/type,EX-2.1,/html/body/document/type,.,.,o
4,/html/body/document/type,EX-2.1,/html/body/document/type,1,1,o
...,...,...,...,...,...,...
60072,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,An,An,s_n
60073,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,nex,nex,s_n
60074,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,I,I,b_n
60075,/html/body/document/type/sequence/filename/des...,Annex I-4,/html/body/document/type/sequence/filename/des...,-,-,e_n


In [127]:
sum(exploded_partial.text_list == exploded_partial.nodes) == len(exploded_partial)

True

In [128]:
(exploded_partial['xpaths'] == exploded_partial['xpaths_list']).sum() == len(exploded_partial)

True

In [129]:
#exploded_partial[['xpaths','text','nodes','preds']].to_json('testing_contract_81.json',orient='records')

In [130]:
exploded_partial[['xpaths','text','nodes','preds']].to_csv('testing_contract_85.csv', index=False)