In [1]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


In [2]:
%%capture
!pip install transformers datasets

In [3]:
from datasets import Dataset, load_dataset, load_metric
import transformers
from transformers import AutoTokenizer, AutoModelForTokenClassification, TrainingArguments, Trainer
from transformers import DataCollatorForTokenClassification
import numpy as np
import torch
import pandas as pd
import json
from tqdm import tqdm

In [4]:
task = "ner"

label_list = ['B-PREDICATE-ACT', 'B-ARGUMENT-PLACE',
              'B-ARGUMENT-PATIENT', 'B-ARGUMENT-AGENT',
              'B-PREDICATE-STATE', 'I-PREDICATE-ACT',
              'I-ARGUMENT-PLACE', 'I-ARGUMENT-AGENT',
              'I-ARGUMENT-PATIENT', 'I-PREDICATE-STATE',
              'O']
label_encoding_dict = {value: idx for idx, value in enumerate(label_list)}

In [6]:
def get_token_dataset(sentences):
    list_tokens, list_labels = [], []
    for sentence in sentences:
        list_tokens.append(sentence.split())
        list_labels.append(["O"] * len(list_tokens[-1]))
    return Dataset.from_pandas(pd.DataFrame({'tokens': list_tokens, 'ner_tags': list_labels}))

In [5]:
def tokenize_and_align_labels(examples):
    global tokenizer
    label_all_tokens = True
    tokenized_inputs = tokenizer(list(examples["tokens"]), truncation=True, is_split_into_words=True)
    all_word_ids = []
    labels = []
    for i, label in enumerate(examples[f"{task}_tags"]):
        word_ids = tokenized_inputs.word_ids(batch_index=i)
        previous_word_idx = None
        label_ids = []
        all_word_ids.append(word_ids)
        for word_idx in word_ids:
            # Special tokens have a word id that is None. We set the label to -100 so they are automatically
            # ignored in the loss function.
            if word_idx is None:
                label_ids.append(-100)
            elif label[word_idx] == '0':
                label_ids.append(0)
            # We set the label for the first token of each word.
            elif word_idx != previous_word_idx:
                label_ids.append(label_encoding_dict[label[word_idx]])
            # For the other tokens in a word, we set the label to either the current label or -100, depending on
            # the label_all_tokens flag.
            else:
                label_ids.append(label_encoding_dict[label[word_idx]] if label_all_tokens else -100)
            previous_word_idx = word_idx

        labels.append(label_ids)

    tokenized_inputs["labels"] = labels
    tokenized_inputs["word_ids"] = all_word_ids
    return tokenized_inputs

In [7]:
def predict(sentence):
    list_predictions = []
    test_dataset = get_token_dataset(sentence)
    # test_dataset = get_token_dataset([df["contexts"][data_i] for data_i in range(len(df))])
    test_tokenized_datasets = test_dataset.map(tokenize_and_align_labels, batched=True)

    trainer = Trainer(model, eval_dataset=test_tokenized_datasets, data_collator=data_collator)
    predictions, pseudo_labels, _ = trainer.predict(test_tokenized_datasets)
    predictions = np.argmax(predictions, axis=2)

    list_prediction_probabily = []
    word_ids = test_tokenized_datasets["word_ids"]
    
    tokens = test_dataset['tokens']
    for idx, (prediction, pseudo_label) in tqdm(enumerate(zip(predictions, pseudo_labels))):
        prediction_probabily = []
        for i, (p, l) in enumerate(zip(prediction, pseudo_label)):
            if l == -100: continue
            if word_ids[idx][i] == word_ids[idx][i-1]: 
                continue
            prediction_probabily.append(label_list[p])
       
        if len(prediction_probabily) != len(tokens[idx]):
            print(len(prediction_probabily), len(tokens[idx]))
            print((prediction_probabily), (tokens[idx]))
            error_files.add(idx)
            for k in range(len(prediction_probabily), len(tokens[idx])):
                prediction_probabily.append("O")
            print(len(prediction_probabily), len(tokens[idx]))

        list_predictions.append(prediction_probabily)
    return list_predictions

In [8]:
def merge(all_sentence, num_sentences_in_para): # Merge sentence into paragraph using num_sentences_in_para

    paragraphs = [] 
    previous_num_sentences = 0
    for num_sentences in num_sentences_in_para:
        para = []
        for sentence in all_sentence[previous_num_sentences:previous_num_sentences+num_sentences]:
            para.append(sentence)
        paragraphs.append(para)
        previous_num_sentences += num_sentences
    return paragraphs

In [9]:
def start_end_index(example):
    tok_to_orig_index = []
    orig_to_tok_index = []
    all_doc_tokens = []
    for i, token in enumerate(example['contexts'].split()):
        for t in range(len(token)+1):
            tok_to_orig_index.append(i)
        all_doc_tokens.append(token)
    if example['is_impossible']:
        start_index = -1
        end_index = -1
    else:
        start_index = tok_to_orig_index[int(example['start'])]
        end_index = tok_to_orig_index[int(example['end'])]
    return start_index, end_index

In [None]:
df['answer_text'] = df_org['answers']
start_end = [start_end_index(df_org.iloc[i]) for i in range(len(df))]
df['start'] = [se[0] for se in start_end]
df['end'] = [se[-1] for se in start_end]

NameError: ignored

In [None]:
df = pd.read_csv(f"/content/drive/MyDrive/KLTN/srl/data_original/train_5.csv")

In [None]:
df.head()

Unnamed: 0,id,contexts,questions,is_impossible,answers,start,end
0,uit_000001,Phạm Văn Đồng ( 1 tháng 3 năm 1906 – 29 tháng ...,Tên gọi nào được Phạm Văn Đồng sử dụng khi làm...,False,Lâm Bá Kiệt,522.0,533.0
1,uit_000002,Phạm Văn Đồng ( 1 tháng 3 năm 1906 – 29 tháng ...,Phạm Văn Đồng giữ chức vụ gì trong bộ máy Nhà ...,False,Thủ tướng,62.0,71.0
2,uit_000003,Phạm Văn Đồng ( 1 tháng 3 năm 1906 – 29 tháng ...,"Giai đoạn năm 1955 - 1976 , Phạm Văn Đồng nắm ...",False,Thủ tướng Chính phủ Việt Nam Dân chủ Cộng hòa,250.0,295.0
3,uit_000004,Phạm Văn Đồng ( 1 tháng 3 năm 1906 – 29 tháng ...,Tên gọi nào được Phạm Văn Đồng sử dụng trước k...,True,,,
4,uit_000005,Phạm Văn Đồng ( 1 tháng 3 năm 1906 – 29 tháng ...,Hồ Học Lãm giữ chức vụ gì trong bộ máy Nhà nướ...,True,,,


In [10]:
files = ["train_5", "dev_5"]
for n_cp in range(10):
    model_checkpoint = f'/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-{n_cp+1}/checkpoint-15000'
    for f in files:
        print("model_checkpoint: ",model_checkpoint,"\nfile: ", f)
        df = pd.read_csv(f"/content/drive/MyDrive/KLTN/srl/data_original/{f}.csv")
        print("model_checkpoint: ", model_checkpoint)
        df.columns = ["id",'contexts', 'questions', "labels", "answers","start","end"]
        df['sample_id'] = range(1, len(df) + 1)
        df = df[['sample_id',"id",'contexts', 'questions', "labels", "answers","start","end"]]

        sample_ids = [df["sample_id"][i] for i in range(len(df))]

        error_files = set()
        list_predictions = []
        
        tokenizer = AutoTokenizer.from_pretrained(model_checkpoint)
        assert isinstance(tokenizer, transformers.PreTrainedTokenizerFast)
        data_collator = DataCollatorForTokenClassification(tokenizer)

        # Split all paragraphs to sentences and store in all_sentences 
        all_sentence = []
        num_sentences_in_para = [] # the list of the number of sentences in paragraphs
        for paragraph in df['contexts']:
            num_sentences_in_para.append(len(paragraph.split(" . ")))
            for sentence in paragraph.split(" . "):
                all_sentence.append(sentence)
        
        prediction_lst = []
        model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_encoding_dict))
        print("predicting...")
        print(len(all_sentence))
        predict_sentence = predict(all_sentence)

        
        print("Predict sentence completely!")
        question_pred_list = predict(df['questions'])
        print("Predict question completely!")

        lst_tag_predict = merge(predict_sentence, num_sentences_in_para)
        s_list = []
        for tag_para in lst_tag_predict:
            s = []
            for tag_sen in tag_para:
                for tag in tag_sen:
                    s.append(tag)
                s.append('O')
            s_list.append(s[:-1])
        df_tags = pd.DataFrame({"contexts":df['contexts'], 
                        "context_tags":s_list, 
                        "questions":df['questions'],
                        "question_tags":question_pred_list,
                        "labels":df['labels'] })
        df_tags.to_csv(f"/content/drive/MyDrive/KLTN/srl/data/data_prediction_{f}_{n_cp+1}.csv", index=False)

model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000
predicting...
117502




  0%|          | 0/118 [00:00<?, ?ba/s]

The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


65069it [00:10, 6288.81it/s]

72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul']
74 74
72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O'

86313it [00:13, 6221.50it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARG

106456it [00:16, 6053.00it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:18, 6386.28it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 39982.78it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-1/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16495it [00:02, 6168.79it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 6422.28it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 18314.21it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64400it [00:10, 5972.95it/s]

72 74
['O', 'B-PREDICATE-ACT', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul']
74 

86481it [00:14, 6128.67it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

106337it [00:17, 5847.97it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', '

117502it [00:19, 6134.44it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 37759.06it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-2/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16727it [00:02, 6055.64it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 6257.52it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 48688.60it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64951it [00:10, 6331.95it/s]

72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PATIENT', 'I-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul

86279it [00:13, 5962.91it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', '

106650it [00:18, 6065.62it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:20, 5734.87it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 38730.53it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-3/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16382it [00:04, 5892.53it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:08, 4805.52it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 43328.31it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64769it [00:10, 6195.01it/s]

72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul']
74 74
72 74
['O', 'O', 'O', 'O'

85911it [00:13, 5916.71it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

106467it [00:16, 5721.82it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:18, 6318.60it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 37732.44it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000 
file:  dev_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-4/checkpoint-15000",
  "arc

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16329it [00:02, 5820.65it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 6167.29it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 49308.77it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64778it [00:10, 5974.09it/s]

72 74
['O', 'B-PREDICATE-ACT', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PATIENT', 'I-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', '

86332it [00:14, 5975.20it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

106883it [00:17, 5800.49it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:19, 6016.35it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 38163.02it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-5/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16720it [00:03, 5865.61it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:08, 4487.35it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 48089.86it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64966it [00:10, 6013.83it/s]

72 74
['O', 'O', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul']
74 74
72 74
['O',

86579it [00:13, 6130.09it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

106909it [00:17, 5995.59it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:18, 6217.21it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 37348.12it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000 
file:  dev_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-6/checkpoint-15000",
  "arc

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16545it [00:02, 5879.73it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 6065.51it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 48687.97it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


65007it [00:10, 6211.11it/s]

72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul']
74 74
72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O'

86075it [00:14, 5807.29it/s]

234 246
['O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLAC

106669it [00:17, 5890.97it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:19, 5970.90it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 37582.11it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-7/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16701it [00:03, 5873.94it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 5768.64it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 45662.76it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64453it [00:10, 5724.67it/s]

72 74
['O', 'B-PREDICATE-ACT', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng',

85915it [00:14, 5860.24it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

106441it [00:17, 5700.22it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:19, 6067.63it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 36878.09it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-8/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16837it [00:02, 5844.05it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 5853.30it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 46216.41it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000",
  "arc

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64689it [00:10, 5930.33it/s]

72 74
['O', 'O', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PREDICATE-ACT', 'I-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng',

86014it [00:14, 5999.61it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

106660it [00:18, 5661.82it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-PREDICATE-ACT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'

117502it [00:20, 5856.91it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 37630.68it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-9/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    "L

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16358it [00:03, 5639.86it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:06, 5712.40it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 45325.95it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000 
file:  train_5
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/tokenizer_config.json
loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000",

predicting...
117502


  0%|          | 0/118 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 117502
  Batch size = 8


64793it [00:10, 5937.52it/s]

72 74
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE'] ['Cuộc', 'thám', 'diễm', 'do', 'von', 'Bellingshausen', 'và', 'Lazarev', 'dẫn', 'đầu', 'trên', 'các', 'chiếc', 'tàu', 'Vostok', 'và', 'Mirny', 'đã', 'đến', 'điểm', '32', 'km', '(', '20', 'mi', ')', 'từ', 'Queen', "Maud's", 'Land', 'và', 'ghi', 'nhận', 'việc', 'nhìn', 'thấy', 'lớp', 'băng', 'tại', '69', '°', '21′', '28', '″N', '2', '°', '14', '′', '50', '″', 'T', '\ufeff', '/', '\ufeff69,35778', '°', 'N', '2,24722', '°', 'T', '\ufeff', '/', '-', '69.35778', ';', '-', '2.24722', 'mà', 'nay', 'được', 'gọi', 'là', 'thềm', 'băng', 'Fimbul']
74 74
72 74
['O', 'O', 'O', 'O'

86041it [00:14, 5652.45it/s]

234 246
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE', '

106360it [00:17, 5608.58it/s]

33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'I-ARGUMENT-PLACE'] ['Qur’an', '(', 'phát', 'âm', '/kɔrˈɑːn', '/', ';', 'tiếng', 'Ả', 'Rập', ':', 'القرآن', '\u200e', 'al-qur’ān', 'có', 'nghĩa', 'là', '"', 'sự', 'xướng', 'đọc', '"', ')', 'là', 'văn', 'bản', 'tôn', 'giáo', 'quan', 'trọng', 'nhất', 'của', 'đạo', 'Hồi']
34 34
33 34
['B-ARGUMENT-PATIENT', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

117502it [00:20, 5804.36it/s]


Predict sentence completely!


  0%|          | 0/22 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 21234
  Batch size = 8


21234it [00:00, 25334.85it/s]


Predict question completely!
model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000 
file:  dev_5


Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/sentencepiece.bpe.model. We won't load it.
Didn't find file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/added_tokens.json. We won't load it.
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/tokenizer.json
loading file None
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/special_tokens_map.json
loading file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/tokenizer_config.json


model_checkpoint:  /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000


loading configuration file /content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000/config.json
Model config XLMRobertaConfig {
  "_name_or_path": "/content/drive/MyDrive/KLTN/srl/SRL_final/xlm-roberta-large-finetuned-ner-10/checkpoint-15000",
  "architectures": [
    "XLMRobertaForTokenClassification"
  ],
  "attention_probs_dropout_prob": 0.1,
  "bos_token_id": 0,
  "classifier_dropout": null,
  "eos_token_id": 2,
  "hidden_act": "gelu",
  "hidden_dropout_prob": 0.1,
  "hidden_size": 1024,
  "id2label": {
    "0": "LABEL_0",
    "1": "LABEL_1",
    "2": "LABEL_2",
    "3": "LABEL_3",
    "4": "LABEL_4",
    "5": "LABEL_5",
    "6": "LABEL_6",
    "7": "LABEL_7",
    "8": "LABEL_8",
    "9": "LABEL_9",
    "10": "LABEL_10"
  },
  "initializer_range": 0.02,
  "intermediate_size": 4096,
  "label2id": {
    "LABEL_0": 0,
    "LABEL_1": 1,
    "LABEL_10": 10,
    "LABEL_2": 2,
    "LABEL_3": 3,
    "LABEL_4": 4,
    "LABEL_5": 5,
    "LABEL_6": 6,
    

predicting...
39579


  0%|          | 0/40 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 39579
  Batch size = 8


16593it [00:02, 5832.12it/s]

441 468
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'B-ARGUMENT-PLACE', 'I-ARGUMENT-PLACE', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', '

39579it [00:07, 5623.60it/s]


Predict sentence completely!


  0%|          | 0/8 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `XLMRobertaForTokenClassification.forward` and have been ignored: word_ids, tokens, ner_tags. If word_ids, tokens, ner_tags are not expected by `XLMRobertaForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 7223
  Batch size = 8


7223it [00:00, 45392.44it/s]


Predict question completely!


In [None]:
df = pd.read_csv("/content/drive/MyDrive/KLTN/srl/data_original/dev_5.csv")
df.columns = ['contexts', 'questions', "labels"]
df['sample_id'] = range(1, len(df) + 1)
df = df[['sample_id', 'contexts', "questions","labels"]]

In [None]:
df.head(5)

Unnamed: 0,sample_id,contexts,questions,labels
0,1,"Nhôm ( bắt nguồn từ tiếng Pháp : aluminium , p...",Nhôm nằm ở ô số mấy trong bảng tuần hoàn hóa h...,False
1,2,"Nhôm ( bắt nguồn từ tiếng Pháp : aluminium , p...","Trong vỏ trái đất , nguyên tố nào chiếm thành ...",False
2,3,"Nhôm ( bắt nguồn từ tiếng Pháp : aluminium , p...",Nhiệt độ cực tiểu để làm nhôm chuyển từ thể rắ...,False
3,4,"Nhôm ( bắt nguồn từ tiếng Pháp : aluminium , p...",Nhôm thường tồn tại ở dạng nào trong lòng đất ?,False
4,5,"Nhôm ( bắt nguồn từ tiếng Pháp : aluminium , p...",Nguyên liệu chính dùng để sản xuất nhôm là gì ?,False


In [None]:
sample_ids = [df["sample_id"][i] for i in range(len(df))]

error_files = set()
list_predictions = []

In [None]:
# Split all paragraphs to sentences and store in all_sentences 
all_sentence = []
num_sentences_in_para = [] # the list of the number of sentences in paragraphs
for paragraph in df['contexts']:
    num_sentences_in_para.append(len(paragraph.split(" . ")))
    for sentence in paragraph.split(" . "):
        all_sentence.append(sentence)

In [None]:
prediction_lst = []
model = AutoModelForTokenClassification.from_pretrained(model_checkpoint, num_labels=len(label_encoding_dict))
predict_sentence = predict(all_sentence)

All model checkpoint weights were used when initializing RemBertForTokenClassification.

All the weights of RemBertForTokenClassification were initialized from the model checkpoint at /content/drive/MyDrive/KLTN/srl/SRL_train_4/rembert-finetuned-ner-1/checkpoint-15000.
If your task is similar to the task the model of the checkpoint was trained on, you can already use RemBertForTokenClassification for predictions without further training.


  0%|          | 0/15 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `RemBertForTokenClassification.forward` and have been ignored: tokens, word_ids, ner_tags. If tokens, word_ids, ner_tags are not expected by `RemBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 14202
  Batch size = 8


4754it [00:00, 11464.11it/s]

170 171
['O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O', 'O'] ['Các', 'món', 'xôi', 'thường', 'thấy', 'là', 'xôi', 'vò', '(', 'xôi', 'trộn', 'đậu', 'xanh', 'giã', 'mịn', ',', 'làm', 'tơi', 'từng', 'hạt',

14202it [00:01, 11917.18it/s]


In [None]:
lst_tag_predict = merge(predict_sentence, num_sentences_in_para)

In [None]:
s_list = []
for tag_para in lst_tag_predict:
    s = []
    for tag_sen in tag_para:
        for tag in tag_sen:
            s.append(tag)
        s.append('O')
    s_list.append(s[:-1])

In [None]:
question_pred_list = predict(df['questions'])

  0%|          | 0/3 [00:00<?, ?ba/s]

No `TrainingArguments` passed, using `output_dir=tmp_trainer`.
PyTorch: setting up devices
The default value for the training argument `--report_to` will change in v5 (from all installed integrations to none). In v5, you will need to use `--report_to all` to get the same behavior as now. You should start updating your code and make this info disappear :-).
The following columns in the test set don't have a corresponding argument in `RemBertForTokenClassification.forward` and have been ignored: tokens, word_ids, ner_tags. If tokens, word_ids, ner_tags are not expected by `RemBertForTokenClassification.forward`,  you can safely ignore this message.
***** Running Prediction *****
  Num examples = 2717
  Batch size = 8


2717it [00:00, 45354.20it/s]


In [None]:
df_tags = pd.DataFrame({"contexts":df['contexts'], 
                        "context_tags":s_list, 
                        "questions":df['questions'],
                        "question_tags":question_pred_list,
                        "labels":df['labels'] })
df_tags.to_csv("/content/drive/MyDrive/KLTN/srl/data/data_prediction_10.csv", index=False)

In [None]:
/content/drive/MyDrive/KLTN/srl/data/ 